#!/usr/bin/env python3 import sys import re from datetime import datetime from itertools import islice, chain import argparse import numpy as np import termplotlib as tpl parser = argparse.ArgumentParser(description='Analyze fail2ban logs') subparsers = parser.add_subparsers(dest='subparser_name', help='sub-command help') parser_histogram = subparsers.add_parser('histogram', help='Show a cli histogram of the hits in time') parser_histogram.add_argument('--bucket-size', type=int, help='histogram bucket size in hours, min 1 hour', default=6) parser_histogram.add_argument('--only-bans', help='only count ban hits', action='store_true', default=False) parser_rank = subparsers.add_parser('rank', help='Rank the ip\'s by number of hits', description=''' Rank the ip\'s by number of hits. For example passing 2 as subnets means that all the ips that start with the same two subnets will be counted as one in the final rank''') parser_rank.add_argument('subnets', type=int, help='Number of consecutive subnets to match.') parser_rank.add_argument('--count', type=int, help='Show the first count entries', default=10) parser_rank.add_argument('--only-bans', help='only count ban hits', action='store_true', default=False) args = parser.parse_args() if args.subparser_name is None: print("Must choose a subprogram {rank, histogram}", file=sys.stderr) parser.print_help() exit(1) RE_DATE = re.compile('^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})') RE_FOUND = re.compile('\[sshd\] (Found) (\d+.\d+.\d+.\d+)') RE_BAN = re.compile('\[sshd\] (Ban) (\d+.\d+.\d+.\d+)') DATE_FORMAT = '%Y-%m-%d %H-%M-%S' found_entries = dict() ban_entries = dict() if args.subparser_name == 'rank': ip_masks = max(min(args.subnets, 4), 1) else: ip_masks = 4 min_date = None max_date = None for _, line in enumerate(sys.stdin): match_entry = RE_FOUND.search(line) if match_entry is None: match_entry = RE_BAN.search(line) if match_entry is not None: match_date = RE_DATE.search(line) d_text = match_date.group(1).replace(',', '.') d = datetime.fromisoformat(d_text) entry_type = match_entry.group(1) ip = '.'.join(match_entry.group(2).split('.')[0:ip_masks]) min_date = min(d, min_date) if min_date is not None else d max_date = max(d, max_date) if max_date is not None else d if entry_type =='Found': found_entries[ip] = found_entries.get(ip, []) + [d] else: ban_entries[ip] = ban_entries.get(ip, []) + [d] entries = ban_entries if args.only_bans else found_entries if args.subparser_name == 'rank': counted_ips = ((k, len(v)) for k, v in entries.items()) sorted_ips = sorted(counted_ips, key=lambda x: x[1], reverse=True) for ip, count in islice(sorted_ips, args.count): print(f'{ip}: found {count} times') else: dates = chain.from_iterable(v for v in entries.values()) timestamps = list(d.timestamp() for d in dates) bucket_size = max(args.bucket_size, 1) num_buckets = (max_date - min_date).total_seconds() / (bucket_size * 3600) num_buckets = max(1, int(num_buckets)) counts, bin_edges = np.histogram(timestamps, bins=num_buckets) ts_edges = [datetime.fromtimestamp(t) for t in bin_edges] labels = [x[0].strftime('%Y-%m-%d %H:%M - ') + x[1].strftime('%Y-%m-%d %H:%M') for x in zip(ts_edges[:-1], ts_edges[1:])] fig = tpl.figure() fig.barh(counts, labels) fig.show()