Skip to content
Snippets Groups Projects
Commit 0fbb2dd2 authored by nimrod's avatar nimrod
Browse files

Added check for size difference between the latest 2 files in the

bucket.

- Also, replaced dateutil with pytz.
- And use prefix instead of regex (as does S3).
- Added more sanity checks (like is the file timestamp from the future,
empty files, etc.).
parent a3f45cbe
No related branches found
No related tags found
No related merge requests found
......@@ -10,20 +10,33 @@ Usage
$ check_s3_bucket --help
usage: check_s3_bucket [-h]
bucket [regex] [warning_threshold] [critical_threshold]
bucket [prefix] [age_warning_threshold]
[age_critical_threshold] [size_warning_threshold]
[size_critical_threshold]
Check that a filename matching the regex was added to the bucket in the given
time window.
Check that a file was added to an S3 bucket in the given time window and is of
a reasonable size.
positional arguments:
bucket S3 bucket to check
regex Filename regex to check (defaults to *)
warning_threshold Warning threshold in hours (defaults to 25)
critical_threshold Critical threshold in hours (defaults to 49)
prefix Filter files by this prefix
age_warning_threshold
Warning threshold for the age of the latest file in
hours (defaults to 24)
age_critical_threshold
Critical threshold for the age of the latest file in
hours (defaults to 48)
size_warning_threshold
Warning threshold for the difference in size between
the latest 2 files in percents (default to 25)
size_critical_threshold
Critical threshold for the difference in size between
the latest 2 files in percents (default to 50)
optional arguments:
-h, --help show this help message and exit
License
-------
......
#!/usr/bin/env python
"""Check that a filename matching the regex was added to the bucket in the
given time window."""
"""Check that a file was added to an S3 bucket in the given time window and is
of a reasonable size."""
from __future__ import (
absolute_import,
division,
print_function,
unicode_literals,
)
import datetime
import re
import argparse
import datetime
try:
import botocore.session
import botocore.exceptions
except ImportError:
print("Failed to import botocore.")
exit(3)
try:
from dateutil.tz import tzlocal
import pytz
except ImportError:
print("Failed to import dateutil.")
print("Failed to import pytz.")
exit(3)
__version__ = "0.1.1"
NOW = datetime.datetime.now(pytz.utc)
def get_file_list(bucket):
def get_file_list(conn, bucket, prefix=""):
"""Return a list of files in the S3 bucket."""
session = botocore.session.get_session()
s3client = session.create_client("s3")
# I'm not concerened with the limitation of number of keys in the
# response as the buckets have a lifecycle rule enabled and files are
# automatically moved of the bucket.
response = s3client.list_objects(Bucket=bucket)
return response["Contents"]
files = conn.list_objects_v2(Bucket=bucket, Prefix=prefix)["Contents"]
files.sort(key=lambda x: x["LastModified"], reverse=True)
files = files[:2]
for file in files:
file["HoursSinceLastModified"] = int(
(NOW - file["LastModified"]).total_seconds() / 3600
)
return files
def main():
"""Main entrypoint."""
parser = argparse.ArgumentParser(
description="""Check that a filename matching the regex was added to the
bucket in the given time window."""
)
# Parse command line arguments.
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("bucket", help="S3 bucket to check")
parser.add_argument(
"regex",
help="Filename regex to check (defaults to *)",
nargs="?",
default="*",
"prefix", help="Filter files by this prefix", nargs="?", default=""
)
parser.add_argument(
"warning_threshold",
help="Warning threshold in hours (defaults to 25)",
"age_warning_threshold",
help="""Warning threshold for the age of the latest file in hours
(defaults to 24)""",
default=24,
type=int,
nargs="?",
)
parser.add_argument(
"critical_threshold",
help="Critical threshold in hours (defaults to 49)",
"age_critical_threshold",
help="""Critical threshold for the age of the latest file in hours
(defaults to 48)""",
default=48,
type=int,
nargs="?",
)
parser.add_argument(
"size_warning_threshold",
help="""Warning threshold for the difference in size between the latest
2 files in percents (default to 25)""",
default=25,
type=int,
nargs="?",
)
parser.add_argument(
"size_critical_threshold",
help="""Critical threshold for the difference in size between the latest
2 files in percents (default to 50)""",
default=50,
type=int,
nargs="?",
)
args = parser.parse_args()
# Connect to S3, get list of files.
session = botocore.session.get_session()
# pylint: disable=invalid-name
s3 = session.create_client("s3")
try:
filelist = get_file_list(args.bucket)
# pylint: disable=broad-except
except BaseException as exception:
assert exception
print("Failed to list files in bucket.")
files = get_file_list(s3, args.bucket, args.prefix)
except botocore.exceptions.BotoCoreError as exception:
print("Failed to list the files in the S3 bucket.")
print(str(exception))
exit(3)
if args.regex != "*":
regex = re.compile(args.regex)
filelist = filter(
lambda x: regex.search(x["Key"]) is not None, filelist
if not files:
print("Not matching files in bucket.")
exit(2)
# Calculate the age of the latest file and if it's in the thresholds set.
if files[0][""] > NOW:
print("Latest file is from the future, something is wrong.")
exit(3)
timedelta = files[0]["HoursSinceLastModified"]
if timedelta > args.age_critical_threshold:
print(
"Last file modified is older than {} hours.".format(
args.age_critical_threshold
)
if not filelist:
)
exit(2)
elif timedelta > args.age_warning_threshold:
print(
'No files matching "{}" found in {}.'.format(
args.regex, args.bucket
"Last file modified is older than {} hours.".format(
args.age_warning_threshold
)
)
exit(1)
now = datetime.datetime.now(tz=tzlocal())
# pylint: disable=invalid-name
LastModifiedDeltas = list(
map(
lambda x: int((now - x["LastModified"]).total_seconds() / 3600),
filelist,
)
# Calculate the size ratio between the latest 2 files and check if
# it's in the threshold set.
if files[0]["Size"] == 0:
print("Latest file is empty.")
exit(2)
elif len(files) == 1:
print(
"""Found only 1 file in the bucket, can't calculate size
difference."""
)
LastModifiedDeltas.sort()
delta = LastModifiedDeltas[0]
if delta >= args.critical_threshold:
exit(3)
elif files[1]["Size"] == 0:
print("The last but 1 file is empty, can't calculate size difference.")
exit(3)
size_ratio = 100 * abs((files[1] - files[0]) / files[1])
if size_ratio > args.size_critical_threshold:
print(
"Last file modified is older than {} hours.".format(
args.critical_threshold
"The size difference between the latest 2 file is {}%.".format(
size_ratio
)
)
exit(2)
elif delta >= args.warning_threshold:
if size_ratio > args.size_warning_threshold:
print(
"Last file modified is older than {} hours.".format(
args.warning_threshold
"The size difference between the latest 2 file is {}%.".format(
size_ratio
)
)
exit(1)
else:
print(
"Last file modified is newer than {} hours.".format(
args.warning_threshold
)
)
exit(0)
print("File found and is within the thresholds set.")
if __name__ == "__main__":
......
......@@ -5,8 +5,8 @@ from setuptools import setup, find_packages
setup(
name="check_s3_bucket",
version="0.1.1",
description="""Check that a filename matching the regex was added to the
bucket in the given time window.""",
description="""Check that a file was added to an S3 bucket in the given time
window and is of a reasonable size.""",
long_description=open("README.rst", "r").read(),
url="https://www.shore.co.il/git/check_s3_bucket",
author="Nimrod Adar",
......@@ -27,6 +27,6 @@ setup(
],
keywords="nagios s3 aws monitoring",
packages=find_packages(),
install_requires=["python-dateutil", "botocore"],
install_requires=["pytz", "botocore"],
entry_points={"console_scripts": ["check_s3_bucket=check_s3_bucket:main"]},
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment