Lease files provided on disk can be huge, as we have seen in large deployments (at their repair centers). Reading and parsing those in-memory is extraordinarily expensive. Rough tests show RAM use for a parsed leases file to be about 5x its size.
So for 'format 1' JSON lease files we use the greplease.grep() function that searches for the key->val combo inthe leases file and returns the value -- which is handled by the 'singleton lease' codeflow in find_leases() If the lease is a singleton lease, then we read it straight away. The greplease code has the option of using mmap if available. If we get mmap in future initrds (~20KB) we can drop the read() based version which is a tad convoluted. --- This is a needed fix, but perhaps controversial. It tests well, but sure needs review. I have uploaded some sample large-ish sigfiles (padded with various things) on http://dev.laptop.org/~martin/ - use 'cat-leases' to add your own leases to it for testing. According to Uruguay, their lease.sig was >100MB and XOs would die trying to allocate ~500 MB for the parsed data structure. --- src/activate.py | 40 +++++++++++---- src/greplease.py | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 178 insertions(+), 11 deletions(-) create mode 100755 src/greplease.py diff --git a/src/activate.py b/src/activate.py index bac8769..fa0447f 100644 --- a/src/activate.py +++ b/src/activate.py @@ -11,13 +11,31 @@ from subprocess import check_call, call from binascii import hexlify sys.path += [ '/act-gui' ] # gui_client is in a subdir from gui_client import send +import greplease -def try_blk(device, mnt, fstype='msdos'): +def lease_from_file(fname, serial_num): + """Find the appropriate lease in a file that may be + a bare lease ("singleton") or a -- perhaps huge -- + CJSON file. + """ + fh = open(fname, 'r') + head = fh.read(5) + fh.close() + if head == '[1,{"': + # matches the start of a well-formed v1 leases file + # we use grep here to handle possibly huge lease files + # (in use at large deployments' repair centers) + return greplease.grep(fname, serial_num) + fh = open(fname, 'r') + fc = fh.read() + fh.close() + return fc + +def try_blk(device, mnt, serial_num, fstype='msdos'): """Try to mount a block device and read keylist from it.""" try: with blk_mounted(device, mnt, fstype): - with open(os.path.join(mnt,'lease.sig')) as f: - return f.read() + return lease_from_file(os.path.join(mnt,'lease.sig'), serial_num) except: return None @@ -328,9 +346,9 @@ def activate (serial_num, uuid): # check SD card. ##################### send('SD start') sd_init() - keylist = try_blk('/dev/mmcblk0p1', SD_MNT) + keylist = try_blk('/dev/mmcblk0p1', SD_MNT, serial_num) if not keylist: - keylist = try_blk('/dev/mmcblk0', SD_MNT) # unpartitioned SD card + keylist = try_blk('/dev/mmcblk0', SD_MNT, serial_num) # unpartitioned SD card if keylist: send('SD success') try: @@ -344,12 +362,12 @@ def activate (serial_num, uuid): # Check USB stick #################### send('USB start') usb_init() - if not keylist: - for suf in ['a1','a','b1','b','c1','c','b1','b','a1','a']: - keylist = try_blk('/dev/sd'+suf, USB_MNT) - if keylist: break - # some USB keys take a while to come up - time.sleep(1) + keylist = None + for suf in ['a1','a','b1','b','c1','c','b1','b','a1','a']: + keylist = try_blk('/dev/sd'+suf, USB_MNT, serial_num) + if keylist: break + # some USB keys take a while to come up + time.sleep(1) if keylist: send('USB success') try: diff --git a/src/greplease.py b/src/greplease.py new file mode 100755 index 0000000..6a1ef95 --- /dev/null +++ b/src/greplease.py @@ -0,0 +1,149 @@ +#!/usr/bin/python + +import re + +def grep_for_lease_mmap(fpath, sn): + """Search a potentially larger-than-mem cjson file for + something that looks like a lease or a series of leases. + + Uses mmap. + + returns a string or False + """ + import mmap + fh = open(fpath, 'r') + m = mmap.mmap(fh.fileno(), 0, mmap.MAP_SHARED, mmap.PROT_READ) + + # find the start of it + rx = re.compile('"'+sn+'":"') + objkey = rx.search(m) + + if objkey: + # find the tail - the first non-escaped + # doublequotes. This relies on sigs not + # having escape chars themselves. + # TODO: Negative look-behind assertion to handle + # escaped values. + rx = re.compile('"') + objend = rx.search(m, objkey.end()) + + if objkey and objend: + found = m[objkey.end():objend.start()] + else: + found = False + + m.close() + fh.close() + + return found + +def grep_for_lease_read(fpath, sn): + """Search a potentially larger-than-mem cjson file for + something that looks like a lease or a series of leases. + + Uses old read()s + + returns a string or False + """ + # Use read()s, but keep stuff aligned to 4KB pages + # so we stand a chance to hit the fast paths. + page = 4096 #* 1024 + step = 0 + cursor = 0 + + needle = '"'+sn+'":"' + needlerx = re.compile(needle) + needlelength = len(needle) + + fh = open(fpath, 'r') + + buf = '' + buftail = '' + + while True: + + buf = fh.read(page) + if (buf == ''): # EOF + break + + buf = buftail + buf + + objkey = needlerx.search(buf) + if objkey: + # found the needle - issue a read + # from here and break + fh.seek( page * step + objkey.start() - len(buftail)) + buf = fh.read(page) + # re-search for objkey - to get the offsets right + objkey = needlerx.search(buf) + break + + # prep for next read - keep tail + # in case needle is on the boundary + buftail = buf[-needlelength:] + step = step+1 + fh.seek( page * step ) + #print " [ Seek to %s ]" % (page * step) + + if objkey: + # find the tail - the first non-escaped + # doublequotes. This relies on sigs not + # having escape chars themselves. + # TODO: Negative look-behind assertion to handle + # escaped values. + rx = re.compile('"') + objend = rx.search(buf, objkey.end()) + + if objkey and objend: + found = buf[objkey.end():objend.start()] + else: + found = False + + fh.close() + + return found + +def grep(fpath, sn): + + hasmmap = True + try: + import mmap + except: + hasmmap = False + + if hasmmap: + return grep_for_lease_mmap(fpath, sn) + else: + return grep_for_lease_read(fpath, sn) + + +## sample test - work through a cjson file +## based on the 'words' dict file, in reverse. +## each word is key and value, with the value +## having its capitalisation reversed. +# import sys +# fh = open(sys.argv[1]) +# bigdata = {} +# lines = fh.readlines() +# lines.reverse() +# for k in lines: +# k = k.strip() +# print "Looking for %s" % k +# found = grep(sys.argv[2], k) +# if found: +# if found == k.swapcase(): +# print "... found good match" +# else: +# print "BAD MATCH %s" % found +# else: +# print "NO MATCH" + +## Another sample test - args: filename, SN +#import sys +#found = grep(sys.argv[1], sys.argv[2]) +# +#if found: +# print "Found:" + found +#else: +# print 'not found' + -- 1.6.0.6 _______________________________________________ Devel mailing list Devel@lists.laptop.org http://lists.laptop.org/listinfo/devel