commit 9aaf3ac89600f8e36000a85b98da4000c9a84d3f
Author: Enrico Lumetti <enrico.lumetti@gmail.com>
Date:   Fri Sep 17 20:48:22 2021 +0200

    First version

diff --git a/main.py b/main.py
new file mode 100755
index 0000000..0f922ee
--- /dev/null
+++ b/main.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+import sys
+import re
+from datetime import datetime
+from itertools import islice, chain
+import argparse
+import numpy as np
+import termplotlib as tpl
+
+parser = argparse.ArgumentParser(description='Analyze fail2ban logs')
+subparsers = parser.add_subparsers(dest='subparser_name', help='sub-command help')
+
+parser_histogram = subparsers.add_parser('histogram', help='Show a cli histogram of the hits in time')
+parser_histogram.add_argument('--bucket-size', type=int, help='histogram bucket size in hours, min 1 hour', default=6)
+parser_histogram.add_argument('--only-bans', help='only count ban hits', action='store_true', default=False)
+
+parser_rank = subparsers.add_parser('rank', help='Rank the ip\'s by number of hits', description='''
+Rank the ip\'s by number of hits. For example passing 2 as subnets means that all the
+ips that start with the same two subnets will be counted as one in the final rank''')
+parser_rank.add_argument('subnets', type=int, help='Number of consecutive subnets to match.')
+parser_rank.add_argument('--count', type=int, help='Show the first count entries', default=10)
+parser_rank.add_argument('--only-bans', help='only count ban hits', action='store_true', default=False)
+
+args = parser.parse_args()
+if args.subparser_name is None:
+    print("Must choose a subprogram {rank, histogram}", file=sys.stderr)
+    parser.print_help()
+    exit(1)
+
+RE_DATE = re.compile('^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})')
+RE_FOUND = re.compile('\[sshd\] (Found) (\d+.\d+.\d+.\d+)')
+RE_BAN = re.compile('\[sshd\] (Ban) (\d+.\d+.\d+.\d+)')
+
+DATE_FORMAT = '%Y-%m-%d %H-%M-%S'
+
+found_entries = dict()
+ban_entries = dict()
+
+if args.subparser_name == 'rank':
+    ip_masks = max(min(args.subnets, 4), 1)
+else:
+    ip_masks = 4
+
+min_date = None
+max_date = None
+for _, line in enumerate(sys.stdin):
+    match_entry = RE_FOUND.search(line)
+    if match_entry is None:
+        match_entry = RE_BAN.search(line)
+
+    if match_entry is not None:
+        match_date = RE_DATE.search(line)
+        d_text = match_date.group(1).replace(',', '.')
+        d = datetime.fromisoformat(d_text)
+        entry_type = match_entry.group(1)
+        ip = '.'.join(match_entry.group(2).split('.')[0:ip_masks])
+
+        min_date = min(d, min_date) if min_date is not None else d
+        max_date = max(d, max_date) if max_date is not None else d
+        if entry_type =='Found':
+            found_entries[ip] = found_entries.get(ip, []) + [d]
+        else:
+            ban_entries[ip] = ban_entries.get(ip, []) + [d]
+
+entries = ban_entries if args.only_bans else found_entries
+if args.subparser_name == 'rank':
+    counted_ips = ((k, len(v)) for k, v in entries.items())
+    sorted_ips = sorted(counted_ips, key=lambda x: x[1], reverse=True) 
+    for ip, count in islice(sorted_ips, args.count):
+        print(f'{ip}: found {count} times')
+else:
+    dates = chain.from_iterable(v for v in entries.values())
+    timestamps = list(d.timestamp() for d in dates)
+    num_buckets = (max_date - min_date).total_seconds() / (args.bucket_size * 3600)
+    num_buckets = max(1, int(num_buckets))
+    counts, bin_edges = np.histogram(timestamps, bins=num_buckets)
+    fig = tpl.figure()
+    fig.hist(counts, bin_edges)
+    fig.show()
+
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..1b56f5a
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+numpy==1.21.2
+termplotlib==0.3.8