From 0fbb2dd290613833965652d50773f9ba44c5ff8a Mon Sep 17 00:00:00 2001 From: Adar Nimrod <nimrod@shore.co.il> Date: Thu, 20 Jun 2019 00:02:44 +0300 Subject: [PATCH] Added check for size difference between the latest 2 files in the bucket. - Also, replaced dateutil with pytz. - And use prefix instead of regex (as does S3). - Added more sanity checks (like is the file timestamp from the future, empty files, etc.). --- README.rst | 29 +++++-- check_s3_bucket/__init__.py | 149 ++++++++++++++++++++++-------------- setup.py | 6 +- 3 files changed, 117 insertions(+), 67 deletions(-) diff --git a/README.rst b/README.rst index 481d875..4f865ea 100644 --- a/README.rst +++ b/README.rst @@ -10,19 +10,32 @@ Usage $ check_s3_bucket --help usage: check_s3_bucket [-h] - bucket [regex] [warning_threshold] [critical_threshold] + bucket [prefix] [age_warning_threshold] + [age_critical_threshold] [size_warning_threshold] + [size_critical_threshold] - Check that a filename matching the regex was added to the bucket in the given - time window. + Check that a file was added to an S3 bucket in the given time window and is of + a reasonable size. positional arguments: - bucket S3 bucket to check - regex Filename regex to check (defaults to *) - warning_threshold Warning threshold in hours (defaults to 25) - critical_threshold Critical threshold in hours (defaults to 49) + bucket S3 bucket to check + prefix Filter files by this prefix + age_warning_threshold + Warning threshold for the age of the latest file in + hours (defaults to 24) + age_critical_threshold + Critical threshold for the age of the latest file in + hours (defaults to 48) + size_warning_threshold + Warning threshold for the difference in size between + the latest 2 files in percents (default to 25) + size_critical_threshold + Critical threshold for the difference in size between + the latest 2 files in percents (default to 50) optional arguments: - -h, --help show this help message and exit + -h, --help show this help message and exit + License ------- diff --git a/check_s3_bucket/__init__.py b/check_s3_bucket/__init__.py index 87dfc40..5bd9b37 100755 --- a/check_s3_bucket/__init__.py +++ b/check_s3_bucket/__init__.py @@ -1,119 +1,156 @@ #!/usr/bin/env python -"""Check that a filename matching the regex was added to the bucket in the -given time window.""" +"""Check that a file was added to an S3 bucket in the given time window and is +of a reasonable size.""" from __future__ import ( absolute_import, division, print_function, unicode_literals, ) -import datetime -import re import argparse +import datetime try: import botocore.session + import botocore.exceptions except ImportError: print("Failed to import botocore.") exit(3) try: - from dateutil.tz import tzlocal + import pytz except ImportError: - print("Failed to import dateutil.") + print("Failed to import pytz.") exit(3) __version__ = "0.1.1" +NOW = datetime.datetime.now(pytz.utc) -def get_file_list(bucket): +def get_file_list(conn, bucket, prefix=""): """Return a list of files in the S3 bucket.""" - session = botocore.session.get_session() - s3client = session.create_client("s3") # I'm not concerened with the limitation of number of keys in the # response as the buckets have a lifecycle rule enabled and files are # automatically moved of the bucket. - response = s3client.list_objects(Bucket=bucket) - return response["Contents"] + files = conn.list_objects_v2(Bucket=bucket, Prefix=prefix)["Contents"] + files.sort(key=lambda x: x["LastModified"], reverse=True) + files = files[:2] + for file in files: + file["HoursSinceLastModified"] = int( + (NOW - file["LastModified"]).total_seconds() / 3600 + ) + return files def main(): """Main entrypoint.""" - parser = argparse.ArgumentParser( - description="""Check that a filename matching the regex was added to the - bucket in the given time window.""" - ) + + # Parse command line arguments. + parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("bucket", help="S3 bucket to check") parser.add_argument( - "regex", - help="Filename regex to check (defaults to *)", - nargs="?", - default="*", + "prefix", help="Filter files by this prefix", nargs="?", default="" ) parser.add_argument( - "warning_threshold", - help="Warning threshold in hours (defaults to 25)", + "age_warning_threshold", + help="""Warning threshold for the age of the latest file in hours + (defaults to 24)""", default=24, type=int, nargs="?", ) parser.add_argument( - "critical_threshold", - help="Critical threshold in hours (defaults to 49)", + "age_critical_threshold", + help="""Critical threshold for the age of the latest file in hours + (defaults to 48)""", default=48, type=int, nargs="?", ) + parser.add_argument( + "size_warning_threshold", + help="""Warning threshold for the difference in size between the latest + 2 files in percents (default to 25)""", + default=25, + type=int, + nargs="?", + ) + parser.add_argument( + "size_critical_threshold", + help="""Critical threshold for the difference in size between the latest + 2 files in percents (default to 50)""", + default=50, + type=int, + nargs="?", + ) args = parser.parse_args() + + # Connect to S3, get list of files. + session = botocore.session.get_session() + # pylint: disable=invalid-name + s3 = session.create_client("s3") try: - filelist = get_file_list(args.bucket) - # pylint: disable=broad-except - except BaseException as exception: - assert exception - print("Failed to list files in bucket.") + files = get_file_list(s3, args.bucket, args.prefix) + except botocore.exceptions.BotoCoreError as exception: + print("Failed to list the files in the S3 bucket.") + print(str(exception)) + exit(3) + + if not files: + print("Not matching files in bucket.") + exit(2) + + # Calculate the age of the latest file and if it's in the thresholds set. + if files[0][""] > NOW: + print("Latest file is from the future, something is wrong.") exit(3) - if args.regex != "*": - regex = re.compile(args.regex) - filelist = filter( - lambda x: regex.search(x["Key"]) is not None, filelist + timedelta = files[0]["HoursSinceLastModified"] + if timedelta > args.age_critical_threshold: + print( + "Last file modified is older than {} hours.".format( + args.age_critical_threshold + ) ) - if not filelist: + exit(2) + elif timedelta > args.age_warning_threshold: print( - 'No files matching "{}" found in {}.'.format( - args.regex, args.bucket + "Last file modified is older than {} hours.".format( + args.age_warning_threshold ) ) exit(1) - now = datetime.datetime.now(tz=tzlocal()) - # pylint: disable=invalid-name - LastModifiedDeltas = list( - map( - lambda x: int((now - x["LastModified"]).total_seconds() / 3600), - filelist, + + # Calculate the size ratio between the latest 2 files and check if + # it's in the threshold set. + if files[0]["Size"] == 0: + print("Latest file is empty.") + exit(2) + elif len(files) == 1: + print( + """Found only 1 file in the bucket, can't calculate size + difference.""" ) - ) - LastModifiedDeltas.sort() - delta = LastModifiedDeltas[0] - if delta >= args.critical_threshold: + exit(3) + elif files[1]["Size"] == 0: + print("The last but 1 file is empty, can't calculate size difference.") + exit(3) + + size_ratio = 100 * abs((files[1] - files[0]) / files[1]) + if size_ratio > args.size_critical_threshold: print( - "Last file modified is older than {} hours.".format( - args.critical_threshold + "The size difference between the latest 2 file is {}%.".format( + size_ratio ) ) exit(2) - elif delta >= args.warning_threshold: + if size_ratio > args.size_warning_threshold: print( - "Last file modified is older than {} hours.".format( - args.warning_threshold + "The size difference between the latest 2 file is {}%.".format( + size_ratio ) ) exit(1) else: - print( - "Last file modified is newer than {} hours.".format( - args.warning_threshold - ) - ) - exit(0) + print("File found and is within the thresholds set.") if __name__ == "__main__": diff --git a/setup.py b/setup.py index 2e06d66..c9254a3 100644 --- a/setup.py +++ b/setup.py @@ -5,8 +5,8 @@ from setuptools import setup, find_packages setup( name="check_s3_bucket", version="0.1.1", - description="""Check that a filename matching the regex was added to the - bucket in the given time window.""", + description="""Check that a file was added to an S3 bucket in the given time + window and is of a reasonable size.""", long_description=open("README.rst", "r").read(), url="https://www.shore.co.il/git/check_s3_bucket", author="Nimrod Adar", @@ -27,6 +27,6 @@ setup( ], keywords="nagios s3 aws monitoring", packages=find_packages(), - install_requires=["python-dateutil", "botocore"], + install_requires=["pytz", "botocore"], entry_points={"console_scripts": ["check_s3_bucket=check_s3_bucket:main"]}, ) -- GitLab