#!/usr/bin/python """Sort paragraphs in a text file according to a key defined by some regular expression. I built this so I could sort the books I wanted to get according to which aisle they were shelved in.
""" import re, sys def paragraphs(afile): "Iterate over the paragraphs in a text file." current_paragraph = '' for line in afile: if line.strip() == '': if current_paragraph != '': yield current_paragraph current_paragraph = '' else: current_paragraph += line if current_paragraph != '': yield current_paragraph def get_aisle(regexp): def _(para): mo = re.search(regexp, para) if mo: return mo.group(1) return _ def doit(infile, regexp): paras = list(paragraphs(infile)) paras.sort(key=get_aisle(regexp)) print '\n'.join(paras) if __name__ == '__main__': doit(file(sys.argv[1]), len(sys.argv) > 2 and sys.argv[2] or r'isle (\d+)') # ./aislesort.py ~/sdc1/kragen-pim/books 'by \w+ (\w+)' # ./aislesort.py ~/sdc1/kragen-pim/books '_([\w ]+)_' # ./aislesort.py ~/sdc1/kragen-pim/books '_(?:The )?([\w ]+)_' # ./aislesort.py ~/sdc1/kragen-pim/books 'Recommended by (\w+)' # ./aislesort.py ~/sdc1/kragen-pim/books '(\d\d\d\d-\d\d-\d\d)' # ./aislesort.py ~/sdc1/kragen-pim/books '(\d+) pages' # ./aislesort.py ~/sdc1/kragen-pim/books "(?i)(harpercollins|o'reilly|princeton|mcgraw-hill|sheffield hallam university press)" # ./aislesort.py ~/sdc1/kragen-pim/books "ISBN\s+([-\d]+)"