commit 9aaf3ac89600f8e36000a85b98da4000c9a84d3f Author: Enrico Lumetti Date: Fri Sep 17 20:48:22 2021 +0200 First version diff --git a/main.py b/main.py new file mode 100755 index 0000000..0f922ee --- /dev/null +++ b/main.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +import sys +import re +from datetime import datetime +from itertools import islice, chain +import argparse +import numpy as np +import termplotlib as tpl + +parser = argparse.ArgumentParser(description='Analyze fail2ban logs') +subparsers = parser.add_subparsers(dest='subparser_name', help='sub-command help') + +parser_histogram = subparsers.add_parser('histogram', help='Show a cli histogram of the hits in time') +parser_histogram.add_argument('--bucket-size', type=int, help='histogram bucket size in hours, min 1 hour', default=6) +parser_histogram.add_argument('--only-bans', help='only count ban hits', action='store_true', default=False) + +parser_rank = subparsers.add_parser('rank', help='Rank the ip\'s by number of hits', description=''' +Rank the ip\'s by number of hits. For example passing 2 as subnets means that all the +ips that start with the same two subnets will be counted as one in the final rank''') +parser_rank.add_argument('subnets', type=int, help='Number of consecutive subnets to match.') +parser_rank.add_argument('--count', type=int, help='Show the first count entries', default=10) +parser_rank.add_argument('--only-bans', help='only count ban hits', action='store_true', default=False) + +args = parser.parse_args() +if args.subparser_name is None: + print("Must choose a subprogram {rank, histogram}", file=sys.stderr) + parser.print_help() + exit(1) + +RE_DATE = re.compile('^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})') +RE_FOUND = re.compile('\[sshd\] (Found) (\d+.\d+.\d+.\d+)') +RE_BAN = re.compile('\[sshd\] (Ban) (\d+.\d+.\d+.\d+)') + +DATE_FORMAT = '%Y-%m-%d %H-%M-%S' + +found_entries = dict() +ban_entries = dict() + +if args.subparser_name == 'rank': + ip_masks = max(min(args.subnets, 4), 1) +else: + ip_masks = 4 + +min_date = None +max_date = None +for _, line in enumerate(sys.stdin): + match_entry = RE_FOUND.search(line) + if match_entry is None: + match_entry = RE_BAN.search(line) + + if match_entry is not None: + match_date = RE_DATE.search(line) + d_text = match_date.group(1).replace(',', '.') + d = datetime.fromisoformat(d_text) + entry_type = match_entry.group(1) + ip = '.'.join(match_entry.group(2).split('.')[0:ip_masks]) + + min_date = min(d, min_date) if min_date is not None else d + max_date = max(d, max_date) if max_date is not None else d + if entry_type =='Found': + found_entries[ip] = found_entries.get(ip, []) + [d] + else: + ban_entries[ip] = ban_entries.get(ip, []) + [d] + +entries = ban_entries if args.only_bans else found_entries +if args.subparser_name == 'rank': + counted_ips = ((k, len(v)) for k, v in entries.items()) + sorted_ips = sorted(counted_ips, key=lambda x: x[1], reverse=True) + for ip, count in islice(sorted_ips, args.count): + print(f'{ip}: found {count} times') +else: + dates = chain.from_iterable(v for v in entries.values()) + timestamps = list(d.timestamp() for d in dates) + num_buckets = (max_date - min_date).total_seconds() / (args.bucket_size * 3600) + num_buckets = max(1, int(num_buckets)) + counts, bin_edges = np.histogram(timestamps, bins=num_buckets) + fig = tpl.figure() + fig.hist(counts, bin_edges) + fig.show() + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1b56f5a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +numpy==1.21.2 +termplotlib==0.3.8