On 27/12/17 23:42, Paul Wise wrote:
On Thu, Dec 28, 2017 at 5:41 AM, peter green wrote:
Unfortunately there doesn't seem to be a good way to securely retrive a dsc
from snapshot.debian.org given a package name and version number.
At this time there isn't any good way to do that securely, until
#763419 gets implemented.
That may help a little, though it raises questions of it's own, like
* what keys would be used to sign these re-signed release files? You wouldn't
want to use a regular Debian archive key because you wouldn't want people to be
able to use snapshots to attack Debian users.
* How secure would the re-signing infrastructure be?
And it would only solve one aspect of the problem, the fact that verifying
Release signatures may involve old keys. It wouldn't solve the issue of how to
find that damn Release/Sources pair in the first place.
I have attatched my attempt at a tool for downloading source packages securely
from snapshot.debian.org. It seems to work, comments/improvements welcome.
#!/usr/bin/python3
import sys
import urllib.request
import urllib.error
import json
from bs4 import BeautifulSoup
import re
import deb822
import io
import gzip
import bz2
import lzma
import os
import subprocess
import hashlib
import argparse
parser = argparse.ArgumentParser(description="retrieve a source package from
snapshot.debian.org with gpg verification\n"+
"the source package will be stored in the current directory\n"+
"in the process of verification files source_version_Release and
source_version_Release.gpg will be created in the current directory, these will
be "+
"removed unless --keepevidence is specified"
)
parser.add_argument("source", help="source package name")
parser.add_argument("version", help="source package version")
parser.add_argument("--keepevidence", help="keep Release.gpg, Release and
Sources files as evidence of package integrity", action="store_true")
parser.add_argument("--1024", help="allow 1024 bit keys, this is needed for old
packages but may leave you vulnerable to well-funded
attackers",action="store_true",dest='allow_1024')
args = parser.parse_args()
package=args.source
version=args.version
scriptdir=os.path.dirname(os.path.realpath(__file__))
colonpos = version.find(':')
#regex used for checking package name
pnallowed = re.compile('[a-z0-9][a-z0-9\-\+\.]+',re.ASCII)
#regex used for checking version number
#this is not a full implementation of Debian version number rules
#just a sanity check for unexcepted characters
vallowed = re.compile('[a-z0-9\-:\+~\.]+',re.ASCII)
#regex used for checking allowed characters in package filenames
#and a few other things.
pfnallowed = re.compile('[a-z0-9\-_:\+~\.]+',re.ASCII)
#regex used for checked allowed characters in timestamp strings
tsallowed = re.compile('[A-Z0-9]+',re.ASCII)
#regex used for matching duplicate or aincient (no Release.gpg) distribution
names
dupain =
re.compile('(Debian.*|bo.*|buzz.*|hamm.*|potato|rex|slink.*)/',re.ASCII)
if not pnallowed.fullmatch(package):
print('package name fails to match required format')
if not vallowed.fullmatch(package):
print('version number fails to match required format')
if colonpos >= 0:
versionnoepoch=version[colonpos+1:]
else:
versionnoepoch=version
url='http://snapshot.debian.org/mr/package/'+package+'/'+version+'/srcfiles?fileinfo=1'
with urllib.request.urlopen(url) as response:
jsondata = response.read()
jsondecoded = json.loads(jsondata.decode("utf-8"))
instances = []
for sha1, info in jsondecoded['fileinfo'].items():
for instance in info:
#print(repr(instance))
if instance['name'] == package+'_'+versionnoepoch+'.dsc':
#print(repr(instance))
instances.append(instance)
#unfortunately snapshot.debian.org doesn't seem to provide a mr interface for
file listings, so we have to screen scrape
#the aim here is to only get true subdirs, not files or symlinks, these seem to
be indicated by a trailing / in the link
#string. We also need to avoid any links with complex urls, which likely
represent page chrome.
def snapshotsubdirlist(url):
result = []
with urllib.request.urlopen(url) as response:
pagedata = response.read()
soup = BeautifulSoup(pagedata, "lxml")
p = re.compile('[a-zA-Z0-9\-]+/',re.ASCII)
for item in soup.find_all('a'):
if not (finalentry is None): break
link = item['href']
if (p.fullmatch(link)):
result.append(link)
return result
#ideally we want sha256 but sometimes that doesn't exist
def findmostsecurereleasefiles(deb822):
if 'SHA256' in deb822:
return deb822['SHA256']
if 'SHA1' in deb822:
return deb822['SHA1']
return deb822['MD5SUM']
def findmostsecurespfiles(deb822):
if 'Checksums-Sha256' in deb822:
return deb822['Checksums-Sha256']
if 'Checksums-Sha1' in deb822:
return deb822['Checksums-Sha1']
return deb822['Files']
finalentry = None
for instance in instances:
if not (finalentry is None): break
if not pfnallowed.fullmatch(instance['archive_name']):
print("archive name contains unexpected characters")
sys.exit(10)
if not tsallowed.fullmatch(instance['first_seen']):
print("first seen contains unexpected characters")
sys.exit(11)
if instance['archive_name'] == 'debian-archive':
url =
'http://snapshot.debian.org/archive/'+instance['archive_name']+'/'+instance['first_seen']+'/'
print('searching '+url)
dirlist = snapshotsubdirlist(url)
#print(repr(dirlist))
distsurls=[]
for dir in dirlist:
aname = dir[:-1] #strip trailing /
distsurls.append(('http://snapshot.debian.org/archive/'+instance['archive_name']+'/'+instance['first_seen']+'/'+dir+'dists/',aname))
else:
distsurls=[('http://snapshot.debian.org/archive/'+instance['archive_name']+'/'+instance['first_seen']+'/dists/',instance['archive_name'])]
for (distsurl,aname) in distsurls:
if not (finalentry is None): break
print('searching '+distsurl)
dirlist = snapshotsubdirlist(distsurl)
for link in dirlist:
if not (finalentry is None): break
#regular potato archive doesn't have Release.gpg but
potato security archive does
if dupain.fullmatch(link) and ((aname !=
'debian-security') or (link != 'potato/')):
print('ignoring ancient or duplicate
distribution '+link)
continue
if aname == 'debian-security':
link += 'updates/'
elif (aname == 'debian-non-US') and (link !=
'potato-proposed-updates'):
link += 'non-US/'
url = distsurl + link
releaseurl = url + 'Release'
print('searching '+releaseurl+' aname='+aname)
try:
with urllib.request.urlopen(releaseurl) as
response:
releasedata = response.read()
except urllib.error.URLError as e:
print('WARNING: failed to fetch '+releaseurl+'
continueing search')
continue
#print(releasedata)
release = deb822.Release(releasedata)
#for key in release:
# print(key)
components = {}
#print(repr(release))
releasefiles = findmostsecurereleasefiles(release)
for file in releasefiles:
pn = file['name']
pns = pn.split('/')
component = pns[0]
fn = pns[-1]
# 0=none 1=gz 2=bz2 3=xz
cl = -1
if fn == 'Sources': cl = 0
if fn == 'Sources.gz': cl = 1
if fn == 'Sources.bz2': cl = 2
if fn == 'Sources.xz': cl = 3
if len(pns) == 1:
component = ''
if (cl >= 0):
if (component != '') and (not
pfnallowed.fullmatch(component)):
#print(component)
print('component name contains
unexpected characters')
sys.exit(12)
if (component in components):
if components[component] < cl:
components[component] = cl
else:
components[component] = cl
for component, cl in components.items():
if not (finalentry is None): break
compressionsuffix = ''
if cl == 1: compressionsuffix = '.gz'
if cl == 2: compressionsuffix = '.bz2'
if cl == 3: compressionsuffix = '.xz'
if component != '':
pn =
component+'/source/Sources'+compressionsuffix
else:
pn = 'Sources'+compressionsuffix
sourcesurl = url+pn
#print(sourcesurl)
sourcescompressed = io.BytesIO()
with urllib.request.urlopen(sourcesurl) as
response:
sourcescompressed.write(response.read())
sourcescompressed.seek(0)
sourcesf = sourcescompressed
if cl == 1: sourcesf =
gzip.open(sourcescompressed)
if cl == 2: sourcesf =
bz2.open(sourcescompressed)
if cl == 3: sourcesf =
lzma.open(sourcescompressed)
sourcesdata = sourcesf.read()
for entry in
deb822.Sources.iter_paragraphs(sourcesdata):
if (entry['Package'] == package) and
(entry['Version'] == version):
print('found required entry in
'+sourcesurl)
#print(repr(entry))
finalentry =
(releaseurl,releasedata,sourcesdata,instance['first_seen'],instance['archive_name'],entry['Directory'],findmostsecurespfiles(entry))
break
#search complete, now on to the verification
if not (finalentry is None):
(releaseurl,releasedata,sourcesdata,seents,archivename,directory,files)
= finalentry
gpgurl = releaseurl+'.gpg'
print(gpgurl)
with urllib.request.urlopen(gpgurl) as response:
gpgdata = response.read()
f = open(package+'_'+version+'_Release.gpg','wb')
f.write(gpgdata)
f.close
f = open(package+'_'+version+'_Release','wb')
f.write(releasedata)
f.close
#f = open('snapshotsecuretmp/Sources','wb')
#f.write(sourcesdata)
#f.close
#print(scriptdir)
#first verify the gpg signature on relese file
command = ['gpgv','--keyring', scriptdir+'/snapshotsecure.gpg']
if args.allow_1024:
command += ['--keyring', scriptdir+'/snapshotsecure-1024.gpg']
command +=
[package+'_'+version+'_Release.gpg',package+'_'+version+'_Release']
if (subprocess.call(command) != 0):
print('gpg validation failed')
if not args.keepevidence:
os.remove(package+'_'+version+'_Release.gpg')
os.remove(package+'_'+version+'_Release')
sys.exit(2)
#next verify that the Sources file matches the release file.
releasefiles = findmostsecurereleasefiles(release)
if 'sha256' in releasefiles[0]:
hashalg = 'sha256'
m = hashlib.sha256()
elif 'sha1' in releasefiles[0]:
hashalg = 'sha1'
m = hashlib.sha1()
else:
hashalg = 'md5sum'
m = hashlib.md5()
m.update(sourcesdata)
hash = m.hexdigest();
print('sources file has hash '+hash)
release = deb822.Release(releasedata)
foundsources = False
for file in releasefiles:
pn = file['name']
pns = pn.split('/')
component = pns[0]
fn = pns[-1]
#if (fn == 'Sources'):
# print(repr(file))
if (fn == 'Sources') and (hash == file[hashalg]):
foundsources = True
#print(repr(file))
if (foundsources):
print('successfully matched sources file to release file')
else:
print('failed to match Sources file to Release file')
sys.exit(3)
if args.keepevidence:
f = open(package+'_'+version+'_Sources','wb')
f.write(sourcesdata)
f.close
for fileentry in files:
#print(repr(fileentry))
if 'sha256' in fileentry:
hashalg = 'sha256'
m = hashlib.sha256()
elif 'sha1' in fileentry:
hashalg = 'sha1'
m = hashlib.sha1()
else:
hashalg = 'md5sum'
m = hashlib.md5()
filehash = fileentry[hashalg]
filesize = int(fileentry['size'])
filename = fileentry['name']
#sanity check, filename must begin with the package name
if not filename.startswith(package):
print('filename in source package does not begin with
source package name')
sys.exit(4)
#sanity check, filename shouldn't contain any unwanted
characters
if not pfnallowed.fullmatch(filename):
#print(repr(pfnallowed.match(filename)))
print('filename in source package contains unwanted
characters')
sys.exit(5)
if os.path.exists(filename):
f = open(filename,'rb')
filedata = f.read()
f.close
#m = hashlib.sha256()
m.update(filedata)
hash = m.hexdigest();
if hash != filehash:
print("hash sum mismatch when verifying
existing file "+filename)
sys.exit(7)
if (len(filedata) != filesize):
print("fize mismatch when verifying existing
file "+filename)
sys.exit(8)
print("verified existing file "+filename)
else:
#file does not exist, download it
fileurl='http://snapshot.debian.org/archive/'+archivename+'/'+seents+'/'+directory+'/'+filename
#print(fileurl)
with urllib.request.urlopen(fileurl) as response:
filedata = response.read()
#m = hashlib.sha256()
m.update(filedata)
hash = m.hexdigest();
if hash != filehash:
print("hash sum mismatch when downloading
"+filename)
sys.exit(6)
if (len(filedata) != filesize):
print("fize mismatch when downloading
"+filename)
sys.exit(9)
f = open(filename,'wb')
f.write(filedata)
f.close
print("successfully downloaded "+filename)
else:
print('unable to locate Sources file containing package, most likely it
belongs to a very old distribution that does not provide Release.gpg')
sys.exit(1)
#!/bin/bash
cp /usr/share/keyrings/debian-archive-keyring.gpg snapshotsecure.gpg
RMKEYS=/usr/share/keyrings/debian-archive-removed-keys.gpg
gpg --keyring $RMKEYS --no-default-keyring --export `gpg --keyring $RMKEYS
--no-default-keyring --list-keys --with-colons | grep '^pub:[^:]:1024:' | cut
-d ':' -f 5` > snapshotsecure-1024.gpg
gpg --keyring $RMKEYS --no-default-keyring --export `gpg --keyring $RMKEYS
--no-default-keyring --list-keys --with-colons | grep '^pub:' | grep -v
'^pub:[^:]:1024:' | cut -d ':' -f 5` >> snapshotsecure.gpg
_______________________________________________
vcs-pkg-discuss mailing list
vcs-pkg-discuss@lists.alioth.debian.org
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/vcs-pkg-discuss