move to python3, Closes: #1057284

author: Thomas Lange <lange@debian.org> 2023-12-02 21:30:24 +0100
committer: Thomas Lange <lange@debian.org> 2023-12-02 21:30:24 +0100
commit: cb75e2efed8aa70cd922b767cf7bcc28db4db989 (patch)
tree: e5496275a00403c8d04a792588a7ff3af0a80cc5 /get-www-stats
parent: eae1b90be1719914b9b6d38800f9e38128aca027 (diff)
1 files changed, 10 insertions, 11 deletions
diff --git a/get-www-stats b/get-www-stats
index 3df9f0fb537..a1b6183017a 100755
--- a/get-www-stats
+++ b/get-www-stats
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 # get-www-stats - Debian web site popularity statistics
 # Copyright 2010 Marcin Owsiany <porridge@debian.org>
@@ -25,6 +25,7 @@ except ImportError:
 
 from gzip import open as gzopen
 from glob import glob
+from collections import defaultdict
 import logging
 import os
 import re
@@ -50,14 +51,15 @@ for f in log_files:
   else:
     logging.warn('Skipping unexpected filename [%s].' % f)
 
-counts = {}
+counts = defaultdict(int)
 
 for n, logfile, gzipped in sorted(logs):
   logging.info('Reading %s.' % logfile)
   opener = gzipped and gzopen or open
-  for line in opener(logfile):
-    line = line.rstrip()
-    tokens = line.split()
+  for line in opener(logfile,mode='rt'):
+    tokens = line.split(maxsplit=9)
+    if tokens[8] != '200':
+        continue
     if tokens[5] != '"GET':
         continue
     url = tokens[6]
@@ -70,11 +72,8 @@ for n, logfile, gzipped in sorted(logs):
     url = re.sub(r'\.([a-z]{2}|[a-z]{2}-[a-z]{2})\.(html|xml|rdf|pdf)$', '', url)
     url = re.sub(r'\.(html|xml|rdf|pdf)(\.([a-z]{2}|[a-z]{2}-[a-z]{2}))?$', '', url)
     url = re.sub(r'/$', '/index', url)
-    if url in counts:
-      counts[url] += 1
-    else:
-      counts[url] = 1
-  
+    counts[url] += 1
+
 if '/index' not in counts:
   raise Exception('No data for /index')
 elif counts['/index'] < 50000:
@@ -82,7 +81,7 @@ elif counts['/index'] < 50000:
 elif counts['/index'] < 10000:
   raise Exception('Less than 10k hits for /index')
 
-json.dump(sorted([(v, k) for (k, v) in counts.iteritems() if v > 2], reverse=True),
+json.dump(sorted([(v, k) for (k, v) in counts.items() if v > 2], reverse=True),
           sys.stdout,
           indent=2)
author	Thomas Lange <lange@debian.org>	2023-12-02 21:30:24 +0100
committer	Thomas Lange <lange@debian.org>	2023-12-02 21:30:24 +0100
commit	cb75e2efed8aa70cd922b767cf7bcc28db4db989 (patch)
tree	e5496275a00403c8d04a792588a7ff3af0a80cc5 /get-www-stats
parent	eae1b90be1719914b9b6d38800f9e38128aca027 (diff)