Skip to content
Snippets Groups Projects
Commit 0fbb2dd2 authored by nimrod's avatar nimrod
Browse files

Added check for size difference between the latest 2 files in the

bucket.

- Also, replaced dateutil with pytz.
- And use prefix instead of regex (as does S3).
- Added more sanity checks (like is the file timestamp from the future,
empty files, etc.).
parent a3f45cbe
No related branches found
No related tags found
No related merge requests found
...@@ -10,20 +10,33 @@ Usage ...@@ -10,20 +10,33 @@ Usage
$ check_s3_bucket --help $ check_s3_bucket --help
usage: check_s3_bucket [-h] usage: check_s3_bucket [-h]
bucket [regex] [warning_threshold] [critical_threshold] bucket [prefix] [age_warning_threshold]
[age_critical_threshold] [size_warning_threshold]
[size_critical_threshold]
Check that a filename matching the regex was added to the bucket in the given Check that a file was added to an S3 bucket in the given time window and is of
time window. a reasonable size.
positional arguments: positional arguments:
bucket S3 bucket to check bucket S3 bucket to check
regex Filename regex to check (defaults to *) prefix Filter files by this prefix
warning_threshold Warning threshold in hours (defaults to 25) age_warning_threshold
critical_threshold Critical threshold in hours (defaults to 49) Warning threshold for the age of the latest file in
hours (defaults to 24)
age_critical_threshold
Critical threshold for the age of the latest file in
hours (defaults to 48)
size_warning_threshold
Warning threshold for the difference in size between
the latest 2 files in percents (default to 25)
size_critical_threshold
Critical threshold for the difference in size between
the latest 2 files in percents (default to 50)
optional arguments: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
License License
------- -------
......
#!/usr/bin/env python #!/usr/bin/env python
"""Check that a filename matching the regex was added to the bucket in the """Check that a file was added to an S3 bucket in the given time window and is
given time window.""" of a reasonable size."""
from __future__ import ( from __future__ import (
absolute_import, absolute_import,
division, division,
print_function, print_function,
unicode_literals, unicode_literals,
) )
import datetime
import re
import argparse import argparse
import datetime
try: try:
import botocore.session import botocore.session
import botocore.exceptions
except ImportError: except ImportError:
print("Failed to import botocore.") print("Failed to import botocore.")
exit(3) exit(3)
try: try:
from dateutil.tz import tzlocal import pytz
except ImportError: except ImportError:
print("Failed to import dateutil.") print("Failed to import pytz.")
exit(3) exit(3)
__version__ = "0.1.1" __version__ = "0.1.1"
NOW = datetime.datetime.now(pytz.utc)
def get_file_list(bucket): def get_file_list(conn, bucket, prefix=""):
"""Return a list of files in the S3 bucket.""" """Return a list of files in the S3 bucket."""
session = botocore.session.get_session()
s3client = session.create_client("s3")
# I'm not concerened with the limitation of number of keys in the # I'm not concerened with the limitation of number of keys in the
# response as the buckets have a lifecycle rule enabled and files are # response as the buckets have a lifecycle rule enabled and files are
# automatically moved of the bucket. # automatically moved of the bucket.
response = s3client.list_objects(Bucket=bucket) files = conn.list_objects_v2(Bucket=bucket, Prefix=prefix)["Contents"]
return response["Contents"] files.sort(key=lambda x: x["LastModified"], reverse=True)
files = files[:2]
for file in files:
file["HoursSinceLastModified"] = int(
(NOW - file["LastModified"]).total_seconds() / 3600
)
return files
def main(): def main():
"""Main entrypoint.""" """Main entrypoint."""
parser = argparse.ArgumentParser(
description="""Check that a filename matching the regex was added to the # Parse command line arguments.
bucket in the given time window.""" parser = argparse.ArgumentParser(description=__doc__)
)
parser.add_argument("bucket", help="S3 bucket to check") parser.add_argument("bucket", help="S3 bucket to check")
parser.add_argument( parser.add_argument(
"regex", "prefix", help="Filter files by this prefix", nargs="?", default=""
help="Filename regex to check (defaults to *)",
nargs="?",
default="*",
) )
parser.add_argument( parser.add_argument(
"warning_threshold", "age_warning_threshold",
help="Warning threshold in hours (defaults to 25)", help="""Warning threshold for the age of the latest file in hours
(defaults to 24)""",
default=24, default=24,
type=int, type=int,
nargs="?", nargs="?",
) )
parser.add_argument( parser.add_argument(
"critical_threshold", "age_critical_threshold",
help="Critical threshold in hours (defaults to 49)", help="""Critical threshold for the age of the latest file in hours
(defaults to 48)""",
default=48, default=48,
type=int, type=int,
nargs="?", nargs="?",
) )
parser.add_argument(
"size_warning_threshold",
help="""Warning threshold for the difference in size between the latest
2 files in percents (default to 25)""",
default=25,
type=int,
nargs="?",
)
parser.add_argument(
"size_critical_threshold",
help="""Critical threshold for the difference in size between the latest
2 files in percents (default to 50)""",
default=50,
type=int,
nargs="?",
)
args = parser.parse_args() args = parser.parse_args()
# Connect to S3, get list of files.
session = botocore.session.get_session()
# pylint: disable=invalid-name
s3 = session.create_client("s3")
try: try:
filelist = get_file_list(args.bucket) files = get_file_list(s3, args.bucket, args.prefix)
# pylint: disable=broad-except except botocore.exceptions.BotoCoreError as exception:
except BaseException as exception: print("Failed to list the files in the S3 bucket.")
assert exception print(str(exception))
print("Failed to list files in bucket.")
exit(3) exit(3)
if args.regex != "*":
regex = re.compile(args.regex) if not files:
filelist = filter( print("Not matching files in bucket.")
lambda x: regex.search(x["Key"]) is not None, filelist exit(2)
# Calculate the age of the latest file and if it's in the thresholds set.
if files[0][""] > NOW:
print("Latest file is from the future, something is wrong.")
exit(3)
timedelta = files[0]["HoursSinceLastModified"]
if timedelta > args.age_critical_threshold:
print(
"Last file modified is older than {} hours.".format(
args.age_critical_threshold
) )
if not filelist: )
exit(2)
elif timedelta > args.age_warning_threshold:
print( print(
'No files matching "{}" found in {}.'.format( "Last file modified is older than {} hours.".format(
args.regex, args.bucket args.age_warning_threshold
) )
) )
exit(1) exit(1)
now = datetime.datetime.now(tz=tzlocal())
# pylint: disable=invalid-name # Calculate the size ratio between the latest 2 files and check if
LastModifiedDeltas = list( # it's in the threshold set.
map( if files[0]["Size"] == 0:
lambda x: int((now - x["LastModified"]).total_seconds() / 3600), print("Latest file is empty.")
filelist, exit(2)
) elif len(files) == 1:
print(
"""Found only 1 file in the bucket, can't calculate size
difference."""
) )
LastModifiedDeltas.sort() exit(3)
delta = LastModifiedDeltas[0] elif files[1]["Size"] == 0:
if delta >= args.critical_threshold: print("The last but 1 file is empty, can't calculate size difference.")
exit(3)
size_ratio = 100 * abs((files[1] - files[0]) / files[1])
if size_ratio > args.size_critical_threshold:
print( print(
"Last file modified is older than {} hours.".format( "The size difference between the latest 2 file is {}%.".format(
args.critical_threshold size_ratio
) )
) )
exit(2) exit(2)
elif delta >= args.warning_threshold: if size_ratio > args.size_warning_threshold:
print( print(
"Last file modified is older than {} hours.".format( "The size difference between the latest 2 file is {}%.".format(
args.warning_threshold size_ratio
) )
) )
exit(1) exit(1)
else: else:
print( print("File found and is within the thresholds set.")
"Last file modified is newer than {} hours.".format(
args.warning_threshold
)
)
exit(0)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -5,8 +5,8 @@ from setuptools import setup, find_packages ...@@ -5,8 +5,8 @@ from setuptools import setup, find_packages
setup( setup(
name="check_s3_bucket", name="check_s3_bucket",
version="0.1.1", version="0.1.1",
description="""Check that a filename matching the regex was added to the description="""Check that a file was added to an S3 bucket in the given time
bucket in the given time window.""", window and is of a reasonable size.""",
long_description=open("README.rst", "r").read(), long_description=open("README.rst", "r").read(),
url="https://www.shore.co.il/git/check_s3_bucket", url="https://www.shore.co.il/git/check_s3_bucket",
author="Nimrod Adar", author="Nimrod Adar",
...@@ -27,6 +27,6 @@ setup( ...@@ -27,6 +27,6 @@ setup(
], ],
keywords="nagios s3 aws monitoring", keywords="nagios s3 aws monitoring",
packages=find_packages(), packages=find_packages(),
install_requires=["python-dateutil", "botocore"], install_requires=["pytz", "botocore"],
entry_points={"console_scripts": ["check_s3_bucket=check_s3_bucket:main"]}, entry_points={"console_scripts": ["check_s3_bucket=check_s3_bucket:main"]},
) )
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment