summaryrefslogtreecommitdiffstats
path: root/lib/python/sectracker/repo.py
blob: 7e6d454f51918ff713f46229460caf7310ba5f74 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
# sectracker.repo -- mirror Debian repository metadata
# Copyright (C) 2010 Florian Weimer <fw@deneb.enyo.de>
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

from __future__ import with_statement

import bz2 as _bz2
import cjson as _cjson
import hashlib as _hashlib
import gzip as _gzip
import os as _os
import re as _re
import tempfile as _tempfile
import urllib as _urllib

import debian_support as _debian_support
import sectracker.xpickle as _xpickle
import sectracker.parsers as _parsers

MARKER_NAME = "DEBIAN_REPO_MIRROR"

_re_name = _re.compile(r'^[a-z0-9-]+$')
_re_hashentry = _re.compile('^\s*([0-9a-fA-F]{20,})\s+(\d+)\s+(\S+)$')

def _splitfield(data, field):
    tup = tuple(data[field].strip().split())
    if tup == ():
        data[field] = ('',)
    else:
        data[field] = tup

def _splithashes(path, data, field):
    result = {}
    for line in data[field].split('\n'):
        if line == "":
            continue
        match = _re_hashentry.match(line)
        if match is None:
            raise ValueError("invalid line in %r: %r" % (path, line))
        digest, size, name = match.groups()
        result[name] = digest
    data[field] = result

def _parserelease(path, f):
    data = {}
    for p in _debian_support.PackageFile(path, f):
        for k, v in p:
            data[k.lower()] = v
        break # file contains only one record
    _splitfield(data, "components")
    _splitfield(data, "architectures")
    _splithashes(path, data, "md5sum")
    _splithashes(path, data, "sha1")
    _splithashes(path, data, "sha256")
    return data

def _unbzip2hash(src, dst):
    dec = _bz2.BZ2Decompressor()
    digest = _hashlib.sha256()
    while True:
        data = src.read(8192)
        if data == '':
            break
        data = dec.decompress(data)
        dst.write(data)
        digest.update(data)
    return digest.hexdigest()

def _downloadbz2(url, target, expecteddigest):
    try:
        bz2src = _urllib.urlopen(url)
        try:
            dgst = _xpickle.replacefile(
                target, lambda fname, f: _unbzip2hash(bz2src, f))
            if dgst == expecteddigest:
                return True
            return False
        finally:
            bz2src.close()
    except IOError:
        return False

def _downloadgz(url, target, expecteddigest):
    with _tempfile.NamedTemporaryFile() as t:
        try:
            (filename, headers) = _urllib.urlretrieve(url, t.name)
        except IOError:
            return False
        gfile = _gzip.GzipFile(t.name)
        try:
            def copy(fname, f):
                digest = _hashlib.sha256()
                while True:
                    data = gfile.read(8192)
                    if data == "":
                        break
                    f.write(data)
                    digest.update(data)
                if digest.hexdigest() == expecteddigest:
                    return True
                return False
            return _xpickle.replacefile(target, copy)
        finally:
            gfile.close()
    return True

class RepoCollection(object):
    def __init__(self, root):
        """Creates a new repository mirror.
        
        root: path in the local file system"""
        self.root = root
        self.repos = {}
        self.used = ()
        self.releases = None
        self.verbose = False

        if not _os.path.exists(root):
            _os.makedirs(root)
        l = _os.listdir(root)
        if len(l) == 0:
            file(root + "/" + MARKER_NAME, "w").close()
        elif MARKER_NAME not in l:
            raise ValueError("not a Debian repository mirror directory: "
                             + repr(root))

    def add(self, name, url):
        """Adds a repository, given its name and the root URL"""
        if _re_name.match(name) is None:
            raise ValueError("invalid repository name: " + repr(name))
        if name in self.repos:
            raise ValueError("repository already registered: " + repr(name))
        if url[-1:] != '/':
            url += '/'
        self.repos[name] = url

    def update(self):
        self._initused()
        for (name, url) in self.repos.items():
            if not self._updatelrelease(name):
                continue
            if not self.hasrelease(name):
                continue
            rel = self.release(name)
            hashes = rel["sha256"]
            for comp in rel["components"]:
                plainpath = self._sourcepath(comp)
                plainurl = url + plainpath
                if not plainpath in hashes:
                    self.warn("not downloaded because uncompressed version not present in Release file: " + plainurl)
                    continue
                uncompressed_digest = hashes[plainpath]
                listname = self._listname(uncompressed_digest)
                if _os.path.exists(listname):
                    continue
                success = False
                for suffix, method in ((".bz2", _downloadbz2),
                                       (".gz", _downloadgz)):
                    if method(plainurl + suffix, listname,
                              uncompressed_digest):
                        success = True
                        break
                if not success:
                    self.warn("download failed: " + plainurl)

    def _updatelrelease(self, name):
        url = self.repos[name]
        relname = self._relname(name)
        self._markused(relname)
        try:
            def download(fname, f):
                _urllib.urlretrieve(url + 'Release', fname)
            _xpickle.replacefile(relname, download)
            return True
        except IOError:
            self.warn("download of Release file failed: " + url)
            return False

    def hasrelease(self, name):
        if name not in self.repos:
            raise ValueError("name not registered: " + repr(name))
        return _os.path.exists(self._relname(name))

    def release(self, name):
        if name not in self.repos:
            raise ValueError("name not registered: " + repr(name))
        with file(self._relname(name)) as f:
            return _parserelease(name, f)

    def filemap(self, load=False):
        """Returns dictionaries mapping repositories to components to files.
        If load is true, the files are loaded using the source packages
        parser."""
        d = {}
        for name in self.repos:
            rel = self.release(name)
            hashes = rel["sha256"]
            comps = {}
            for comp in rel["components"]:
                plainpath = self._sourcepath(comp)
                if not plainpath in hashes:
                    self.warn("failed to find %s/%s" % (name, comp))
                    continue
                digest = hashes[plainpath]
                listname = self._listname(digest)
                if not _os.path.exists(listname):
                    self.warn("file %s for %s/%s not present" %
                              (listname, name, comp))
                    continue
                if load:
                    comps[comp] = _parsers.sourcepackages(listname)
                else:
                    comps[comp] = listname
            d[name] = comps
        return d

    def _relname(self, name):
        return "%s/r_%s" % (self.root, name)

    def _sourcepath(self, comp):
        # Hack to deal with the "updates/" special case.
        comp = comp.split("/")[-1]
        return comp + "/source/Sources"

    def _listname(self, digest):
        return "%s/h_%s" % (self.root, digest)

    def _initused(self):
        self.used = set()
        self.used.add("%s/%s" % (self.root, MARKER_NAME))

    def _markused(self, name):
        self.used.add(name)
        self.used.add(name + _xpickle.EXTENSION)

    def _haslist(self, digest):
        return _os.path.exists(self._listname(digest))

    def warn(self, msg):
        if self.verbose:
            print(msg)

class Config(object):
    def __init__(self, config, root):
        with file(config) as f:
            self.config = _cjson.decode(f.read())
        self.repositories = self.config["repositories"]
        self.distributions = self.config["distributions"]
        self.releases = {}

        self.collection = RepoCollection(root)
        for k,v in self.repositories.items():
            self.collection.add(k, v)

        for d, dobj in self.distributions.items():
            for m, mobj in dobj.get("members", {}).items():
                for mem in mobj:
                    if mem not in self.repositories:
                        raise ValueError(
                            "distributions[%r][%r] (%r) not a valid repository"
                            % (d, m, mem))
            if "release" in dobj:
                rel = dobj["release"]
                if rel in self.releases:
                    raise ValueError(
                        "distributions[%r] is duplicate of %r (previous was %r)"
                        % (d, rel, self.releases[rel]))
                self.releases[rel] = d

        self._filemap_cache = None

    def update(self):
        self.collection.update()
        self._filemap_cache = None
        
    def filemap(self):
        if self._filemap_cache is None:
            self._filemap_cache = self.collection.filemap(load=True)
        return self._filemap_cache

    def releasepackageversions(self):
        """Returns dictionaries mapping release codenames to packages
        to a set of versions."""
        fm = self.filemap()
        r = {}
        for d, dobj in self.distributions.items():
            pkgver = {}
            for mobj in dobj.get("members", {}).values():
                for mem in mobj:
                    for comps in fm[mem].values():
                        for src in comps.values():
                            if src.name in pkgver:
                                pkgver[src.name].add(src.version)
                            else:
                                pkgver[src.name] = set((src.version,))
            r[d] = pkgver
        return r

© 2014-2024 Faster IT GmbH | imprint | privacy policy