aboutsummaryrefslogtreecommitdiffstats
path: root/get-www-stats
diff options
context:
space:
mode:
authorThomas Lange <lange@debian.org>2023-12-02 21:30:24 +0100
committerThomas Lange <lange@debian.org>2023-12-02 21:30:24 +0100
commitcb75e2efed8aa70cd922b767cf7bcc28db4db989 (patch)
treee5496275a00403c8d04a792588a7ff3af0a80cc5 /get-www-stats
parenteae1b90be1719914b9b6d38800f9e38128aca027 (diff)
move to python3, Closes: #1057284
Diffstat (limited to 'get-www-stats')
-rwxr-xr-xget-www-stats21
1 files changed, 10 insertions, 11 deletions
diff --git a/get-www-stats b/get-www-stats
index 3df9f0fb537..a1b6183017a 100755
--- a/get-www-stats
+++ b/get-www-stats
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
# get-www-stats - Debian web site popularity statistics
# Copyright 2010 Marcin Owsiany <porridge@debian.org>
@@ -25,6 +25,7 @@ except ImportError:
from gzip import open as gzopen
from glob import glob
+from collections import defaultdict
import logging
import os
import re
@@ -50,14 +51,15 @@ for f in log_files:
else:
logging.warn('Skipping unexpected filename [%s].' % f)
-counts = {}
+counts = defaultdict(int)
for n, logfile, gzipped in sorted(logs):
logging.info('Reading %s.' % logfile)
opener = gzipped and gzopen or open
- for line in opener(logfile):
- line = line.rstrip()
- tokens = line.split()
+ for line in opener(logfile,mode='rt'):
+ tokens = line.split(maxsplit=9)
+ if tokens[8] != '200':
+ continue
if tokens[5] != '"GET':
continue
url = tokens[6]
@@ -70,11 +72,8 @@ for n, logfile, gzipped in sorted(logs):
url = re.sub(r'\.([a-z]{2}|[a-z]{2}-[a-z]{2})\.(html|xml|rdf|pdf)$', '', url)
url = re.sub(r'\.(html|xml|rdf|pdf)(\.([a-z]{2}|[a-z]{2}-[a-z]{2}))?$', '', url)
url = re.sub(r'/$', '/index', url)
- if url in counts:
- counts[url] += 1
- else:
- counts[url] = 1
-
+ counts[url] += 1
+
if '/index' not in counts:
raise Exception('No data for /index')
elif counts['/index'] < 50000:
@@ -82,7 +81,7 @@ elif counts['/index'] < 50000:
elif counts['/index'] < 10000:
raise Exception('Less than 10k hits for /index')
-json.dump(sorted([(v, k) for (k, v) in counts.iteritems() if v > 2], reverse=True),
+json.dump(sorted([(v, k) for (k, v) in counts.items() if v > 2], reverse=True),
sys.stdout,
indent=2)

© 2014-2024 Faster IT GmbH | imprint | privacy policy