blob: 7dd010fdc8187489d9240624d21a37ed38e8cc62 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
|
# -*- coding: utf-8 -*-
# oval.parser.wml - module to parse descriptions of
# Debian Security Advisories stored in wml format.
# Extrected tags:
# <description>
# <moreinfo>- Paragraphs before descriptions of
# each release status
#
# (c) 2007 Pavel Vinogradov
# (c) 2004 Javier Fernandez-Sanguino
# Licensed under the GNU General Public License version 2.
import re
import os
import sys
import logging
# Format of wml files is:
#<define-tag description>DESCRIPTION</define-tag>
#<define-tag moreinfo>Multiline information</define-tag>
def parseFile (path):
""" Parse wml file with description of Debian Security Advisories
Keyword arguments:
path -- full path to wml file
return list (dsa id, tags data)"""
data = {}
moreinfo = False
filename = os.path.basename (path)
patern = re.compile(r'dsa-(\d+)')
result = patern.search(filename)
if result:
dsa = result.groups()[0]
else:
logging.log(logging.WARNING, "File %s does not look like a proper DSA wml description, not checking" % filename)
return (None)
logging.log (logging.DEBUG, "Parsing information for DSA %s from wml file %s" % (dsa, filename))
try:
wmlFile = open(path)
for line in wmlFile:
line= line.decode ("ISO-8859-2")
descrpatern = re.compile (r'description>(.*?)</define-tag>')
result = descrpatern.search (line)
if result:
data["description"] = result.groups()[0]
continue
sinfopatern = re.compile (r'<define-tag moreinfo>(.*?)')
result = sinfopatern.search (line)
if result:
moreinfo = True
data["moreinfo"] = result.groups()[0]
continue
einfopatern = re.compile (r'</define-tag>')
if moreinfo and einfopatern.search (line):
data["moreinfo"] = __parseMoreinfo(data["moreinfo"])
moreinfo = False
continue
if moreinfo:
data["moreinfo"] += line
continue
except IOError:
logging.log (logging.ERROR, "Can't work with file %s" % path)
return (dsa, data)
def __parseMoreinfo (info):
""" Remove unnecessary information form moreinfo tag"""
p = re.compile ("<p>(.*?)</p>", re.DOTALL)
paragraphs = [m.groups()[0] for m in re.finditer(p, info)]
result = ""
for par in paragraphs:
if re.match(re.compile("For the .* distribution"), par):
break
result += "\n" + par
return result
|