I apologize for posting to the list, but I've had no luck since 14 Jan 2006, sending the following reply via eMail:
Tor Olav Stava wrote this on Mon, Jan 09, 2006 at 10:30:59AM +0100. My reply is below. > I'll be happy to test any code on my systems. Sorry for the delay. Attached, please find a replacement for HarvestPackages.py. It takes a long time to run, but doesn't do a lot of Internet access. If you find it downloading stuff, you probably need to install the *xml docbook*. HarvestPackages.py produces a copy of pkgs.dat on standard output, so you want to redirect output to that file. It finds 369 packages in the BLFS BOOK. The AuditPackages.py script needs to be changed to "fix" packages that have alternate dependencies. This is not done, yet. Please let me know how HarvestPackages.py works for you now. -- .. Chuck Rhode, Sheboygan, WI, USA .. 30?F. Wind NNW 18 mph. Cloudy.
#!/usr/bin/python # HarvestPackages.py # 2004 FEB 28 . ccr # Extract a database of package names, source archives, dependencies, # and installation procedures from the xml version of the Beyond # Linux from Scratch book. # 2006 JAN 09 . ccr . Migrate to BLFS 6.1. Switch from xml.sax to # . . libxml2. import sys import os import optparse import PackageDB import libxml2 ZERO=0 SPACE=' ' NULL='' NUL='\x00' NA=-1 class cAttributes(dict): def getNames(self): return self.keys() def getValue(self,aKey,aDefault=None): return self.get(aKey,aDefault) class cDocHandler(object): def __init__(self,aReader,aParseEntities=False): self.fReader=aReader return def startDocument(self): # print 'startDocument' return def endDocument(self): # print 'endDocument' return def startElement(self,aName,aAttrs): # print 'startElement',aName,aAttrs return def endElement(self,aName,aIsEmpty=False): # print 'endElement',aName return def characters(self,aContent): # print 'characters',aContent return def ignorableWhitespace(self): # print 'ignorableWhitespace' return def processingInstruction(self,aTarget,aData): # print 'processingInstruction',aTarget,aData return def skippedEntity(self,aName): # print 'skippedEntity' return def Read(self): return self.fReader.Read() def ReadLoop(self): while self.Read()==1: __Type=self.fReader.NodeType() __Name=self.fReader.Name() __IsEmpty=self.fReader.IsEmptyElement() if self.fReader.HasValue(): __Text=self.fReader.Value() else: __Text=None if self.fReader.HasAttributes(): __Attrs={} while self.fReader.MoveToNextAttribute(): __Key=self.fReader.Name() __Value=self.fReader.Value() __Attrs[__Key]=__Value else: __Attrs=None if __Type in [libxml2.XML_READER_TYPE_NONE]: pass elif __Type in [libxml2.XML_READER_TYPE_ELEMENT]: self.startElement(__Name,__Attrs) if __IsEmpty: self.endElement(__Name,__IsEmpty) elif __Type in [libxml2.XML_READER_TYPE_ATTRIBUTE]: pass # Handled above. elif __Type in [libxml2.XML_READER_TYPE_TEXT]: self.characters(__Text) elif __Type in [libxml2.XML_READER_TYPE_CDATA]: self.characters(__Text) elif __Type in [libxml2.XML_READER_TYPE_ENTITY_REFERENCE]: self.skippedEntity(__Name) elif __Type in [libxml2.XML_READER_TYPE_ENTITY]: pass elif __Type in [libxml2.XML_READER_TYPE_PROCESSING_INSTRUCTION]: self.processingInstruction(__Name,__Text) elif __Type in [libxml2.XML_READER_TYPE_COMMENT]: pass elif __Type in [libxml2.XML_READER_TYPE_DOCUMENT]: self.startDocument() elif __Type in [libxml2.XML_READER_TYPE_DOCUMENT_TYPE]: pass elif __Type in [libxml2.XML_READER_TYPE_DOCUMENT_FRAGMENT]: pass elif __Type in [libxml2.XML_READER_TYPE_NOTATION]: pass elif __Type in [libxml2.XML_READER_TYPE_WHITESPACE]: self.ignorableWhitespace() elif __Type in [libxml2.XML_READER_TYPE_SIGNIFICANT_WHITESPACE]: self.characters(__Text) elif __Type in [libxml2.XML_READER_TYPE_END_ELEMENT]: self.endElement(__Name) elif __Type in [libxml2.XML_READER_TYPE_END_ENTITY]: pass elif __Type in [libxml2.XML_READER_TYPE_XML_DECLARATION]: pass return class cStateStack(list): def __init__(self): self.fPoppedItem=None return def Push(self,aItem): self.append(aItem) return def Pop(self,aItem,aEmptyElt=False): if self.IsMostRecently(['%s*'%aItem],aIgnoreCase=False): __Result=self.pop() if aEmptyElt: self.fPoppedItem=__Result else: self.fPoppedItem=None return __Result elif aItem==self.fPoppedItem: # We've inexplicably received an endElement for an empty element. self.fPoppedItem=None return None else: sys.stderr.write(''' State stack corrupted. %s closing but not most recent. Stack unchanged. '''%aItem) print self sys.exit('halt') def GetDepth(self): return len(self) def IsMostRecently(self,aList,aIgnoreCase=True): def Test(aDepth): if aIgnoreCase: __EltList=aList[-aDepth].lower() __EltStack=self[-aDepth].lower() else: __EltList=aList[-aDepth] __EltStack=self[-aDepth] if __EltList==__EltStack: return True __Pos=__EltList.find('*') if __Pos==NA: return False else: return (__EltList[:__Pos]==__EltStack[:__Pos]) __Depth=len(aList) if __Depth<=self.GetDepth(): __Result=Test(__Depth) while (__Depth>1) and __Result: __Depth-=1 __Result=Test(__Depth) return __Result else: return False class cBook(cDocHandler): def __init__(self,aReader,aPackageList): cDocHandler.__init__(self,aReader) self.fPackageList=aPackageList self.fStateStack=cStateStack() self.fBuffer=NULL self.fPackageName=NULL self.fPackageVersion=NULL self.fURL=NULL self.fExternal=NULL self.fPackage=None self.fCapture=False return def startElement(self,aName,aAttrs): self.ProcessElement(aName,'init',aAttrs=aAttrs) return def endElement(self,aName,aIsEmpty=False): self.ProcessElement(aName,'term',aIsEmpty=aIsEmpty) return def characters(self,aContent): if self.fCapture: self.fBuffer+=aContent return def ProcessElement(self,aName,aFunction,aAttrs=None,aIsEmpty=False): def Buffer(aIsInit): self.fCapture=aIsInit if self.fCapture: self.fBuffer=NULL return def PushElement(aIsInit,aName): if aIsInit: if aName in ['bridgehead']: aName+=__SafeAttrs.get('renderas',NULL) self.fStateStack.Push(aName) return def PopElement(aIsInit,aName,aIsEmpty): if not aIsInit: self.fStateStack.Pop(aName,aIsEmpty) return def ProcessSect1(aIsInit): if aIsInit: self.fPackageName=__SafeAttrs.get('id') self.fPackageVersion=__SafeAttrs.get('xreflabel') else: if self.fPackage==None: pass else: self.fPackageList.Append(self.fPackage) self.fPackage=None return def ProcessTitle(aIsInit): Buffer(aIsInit) if not aIsInit: self.fTitle=self.fBuffer if self.fPackageVersion in [NULL,None]: self.fPackageVersion=self.fTitle return def ProcessPackage(aIsInit): if not aIsInit: if self.fTitle.lower().startswith('package information'): if self.fPackage==None: self.fPackage=PackageDB.cPackage() self.fPackage.SetNameVersion(self.fPackageName, self.fPackageVersion) if OPTS.Verbose==True: sys.stderr.write('%s\n'%self.fPackage.GetNameVersion()) return def ProcessURL(aIsInit): if aIsInit: self.fURL=__SafeAttrs.get('url').strip() else: if self.fPackage==None: pass else: if self.fTitle.lower().startswith('package information'): if self.fURL in [NULL,None]: pass else: self.fPackage.fArchiveList=[self.fURL] elif self.fTitle.lower().startswith( 'additional download'): self.fPackage.AppendPatch(self.fURL) return def ProcessConjunction(aIsInit): if self.fTitle.lower().startswith('required') or \ self.fTitle.lower().startswith('recommended'): Buffer(aIsInit) if aIsInit: pass else: __Tokens=[__Tok.lower() for __Tok in self.fBuffer.split()] if 'or' in __Tokens: self.fPackage.AppendDependency('or') return def ProcessExternal(aIsInit): if aIsInit: self.fExternal=__SafeAttrs.get('linkend') else: if self.fTitle.lower().startswith('required') or \ self.fTitle.lower().startswith('recommended'): if self.fPackage==None: pass else: self.fPackage.AppendDependency(self.fExternal) return def ProcessForeign(aIsInit): Buffer(aIsInit) if not aIsInit: if self.fTitle.lower().startswith('required') or \ self.fTitle.lower().startswith('recommended'): if self.fPackage==None: pass else: self.fPackage.AppendDependency(self.fBuffer) return def ProcessCode(aIsInit): Buffer(aIsInit) if not aIsInit: if self.fPackage==None: pass else: self.fPackage.AppendCommand('# %s\n%s'%(self.fTitle,self.fBuffer)) return __IsInit=aFunction in ['init'] __SafeAttrs={} if aAttrs is None: pass else: for (__Key,__Value) in aAttrs.iteritems(): __SafeAttrs[__Key.lower()]=__Value PushElement(__IsInit,aName) if self.fStateStack.IsMostRecently(['sect1']): ProcessSect1(__IsInit) elif (self.fStateStack.IsMostRecently(['title']) or self.fStateStack.IsMostRecently(['bridgehead*'])): ProcessTitle(__IsInit) if (self.fStateStack.IsMostRecently(['sect3','title']) or self.fStateStack.IsMostRecently(['bridgeheadsect3'])): ProcessPackage(__IsInit) elif self.fStateStack.IsMostRecently(['itemizedlist','listitem','para','ulink']): ProcessURL(__IsInit) elif self.fStateStack.IsMostRecently(['para']): ProcessConjunction(__IsInit) elif self.fStateStack.IsMostRecently(['para','xref']): ProcessExternal(__IsInit) elif self.fStateStack.IsMostRecently(['para','ulink']): ProcessForeign(__IsInit) elif self.fStateStack.IsMostRecently(['screen','userinput']): ProcessCode(__IsInit) PopElement(__IsInit,aName,aIsEmpty) return __Parser=optparse.OptionParser() __DefaultIndexDoc=os.path.expanduser('~/BLFS/BOOK/index.xml') __Parser.add_option('-I','--IndexDoc', help='Root of the XML document. Default is %s.' % (__DefaultIndexDoc), default=__DefaultIndexDoc) __Parser.add_option('-P','--PackageDB', help='Output file name to receive package database. Default is > stdout.') __Parser.add_option('-V','--Verbose', action='store_true', help='List package names during processing.', default=False) (OPTS,__Args)=__Parser.parse_args() if len(__Args)>ZERO: __Parser.error('Arguments are prohibited.') if os.path.exists(OPTS.IndexDoc): pass else: __Parser.error(OPTS.IndexDoc+' not found.') if OPTS.PackageDB in [None,NULL,'> stdout']: OPTS.PackageDB=None __Flags=( libxml2.XML_PARSE_XINCLUDE| # Expand xincludes. libxml2.XML_PARSE_NOENT| # Expand entities. I know this doesn't look right. libxml2.XML_PARSE_NOBLANKS| # Suppress whitespace. ZERO) __Book=cBook(libxml2.readerForFile(OPTS.IndexDoc, 'ascii', __Flags, ), PackageDB.PackageList, ) __Book.ReadLoop() PackageDB.Store(OPTS.PackageDB) # Fin
-- http://linuxfromscratch.org/mailman/listinfo/blfs-support FAQ: http://www.linuxfromscratch.org/blfs/faq.html Unsubscribe: See the above information page