diff options
author | Thomas Lange <lange@debian.org> | 2023-12-02 21:30:24 +0100 |
---|---|---|
committer | Thomas Lange <lange@debian.org> | 2023-12-02 21:30:24 +0100 |
commit | cb75e2efed8aa70cd922b767cf7bcc28db4db989 (patch) | |
tree | e5496275a00403c8d04a792588a7ff3af0a80cc5 /get-www-stats | |
parent | eae1b90be1719914b9b6d38800f9e38128aca027 (diff) |
move to python3, Closes: #1057284
Diffstat (limited to 'get-www-stats')
-rwxr-xr-x | get-www-stats | 21 |
1 files changed, 10 insertions, 11 deletions
diff --git a/get-www-stats b/get-www-stats index 3df9f0fb537..a1b6183017a 100755 --- a/get-www-stats +++ b/get-www-stats @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 # get-www-stats - Debian web site popularity statistics # Copyright 2010 Marcin Owsiany <porridge@debian.org> @@ -25,6 +25,7 @@ except ImportError: from gzip import open as gzopen from glob import glob +from collections import defaultdict import logging import os import re @@ -50,14 +51,15 @@ for f in log_files: else: logging.warn('Skipping unexpected filename [%s].' % f) -counts = {} +counts = defaultdict(int) for n, logfile, gzipped in sorted(logs): logging.info('Reading %s.' % logfile) opener = gzipped and gzopen or open - for line in opener(logfile): - line = line.rstrip() - tokens = line.split() + for line in opener(logfile,mode='rt'): + tokens = line.split(maxsplit=9) + if tokens[8] != '200': + continue if tokens[5] != '"GET': continue url = tokens[6] @@ -70,11 +72,8 @@ for n, logfile, gzipped in sorted(logs): url = re.sub(r'\.([a-z]{2}|[a-z]{2}-[a-z]{2})\.(html|xml|rdf|pdf)$', '', url) url = re.sub(r'\.(html|xml|rdf|pdf)(\.([a-z]{2}|[a-z]{2}-[a-z]{2}))?$', '', url) url = re.sub(r'/$', '/index', url) - if url in counts: - counts[url] += 1 - else: - counts[url] = 1 - + counts[url] += 1 + if '/index' not in counts: raise Exception('No data for /index') elif counts['/index'] < 50000: @@ -82,7 +81,7 @@ elif counts['/index'] < 50000: elif counts['/index'] < 10000: raise Exception('Less than 10k hits for /index') -json.dump(sorted([(v, k) for (k, v) in counts.iteritems() if v > 2], reverse=True), +json.dump(sorted([(v, k) for (k, v) in counts.items() if v > 2], reverse=True), sys.stdout, indent=2) |