I apologize for posting to the list, but I've had no luck since 14 Jan
2006, sending the following reply via eMail:

Tor Olav Stava wrote this on Mon, Jan 09, 2006 at 10:30:59AM +0100.
My reply is below.

> I'll be happy to test any code on my systems.

Sorry for the delay.  Attached, please find a replacement for
HarvestPackages.py.

It takes a long time to run, but doesn't do a lot of Internet access.
If you find it downloading stuff, you probably need to install the
*xml docbook*.  

HarvestPackages.py produces a copy of pkgs.dat on standard output, so
you want to redirect output to that file.  It finds 369 packages in
the BLFS BOOK.

The AuditPackages.py script needs to be changed to "fix" packages that
have alternate dependencies.  This is not done, yet.

Please let me know how HarvestPackages.py works for you now.

-- 
.. Chuck Rhode, Sheboygan, WI, USA
.. 30?F. Wind NNW 18 mph. Cloudy. 
#!/usr/bin/python

# HarvestPackages.py
# 2004 FEB 28 . ccr

# Extract a database of package names, source archives, dependencies,
# and installation procedures from the xml version of the Beyond
# Linux from Scratch book.

# 2006 JAN 09 . ccr . Migrate to BLFS 6.1.  Switch from xml.sax to
#             .     . libxml2.

import sys
import os
import optparse
import PackageDB
import libxml2

ZERO=0
SPACE=' '
NULL=''
NUL='\x00'
NA=-1

class cAttributes(dict):
    def getNames(self):
        return self.keys()
    def getValue(self,aKey,aDefault=None):
        return self.get(aKey,aDefault)

class cDocHandler(object):
    def __init__(self,aReader,aParseEntities=False):
        self.fReader=aReader
        return
    def startDocument(self):
#        print 'startDocument'
        return
    def endDocument(self):
#        print 'endDocument'
        return
    def startElement(self,aName,aAttrs):
#        print 'startElement',aName,aAttrs
        return
    def endElement(self,aName,aIsEmpty=False):
#        print 'endElement',aName
        return
    def characters(self,aContent):
#        print 'characters',aContent
        return
    def ignorableWhitespace(self):
#        print 'ignorableWhitespace'
        return
    def processingInstruction(self,aTarget,aData):
#        print 'processingInstruction',aTarget,aData
        return
    def skippedEntity(self,aName):
#        print 'skippedEntity'
        return
    def Read(self):
        return self.fReader.Read()
    def ReadLoop(self):
        while self.Read()==1:
            __Type=self.fReader.NodeType()
            __Name=self.fReader.Name()
            __IsEmpty=self.fReader.IsEmptyElement()
            if self.fReader.HasValue():
                __Text=self.fReader.Value()
            else:
                __Text=None
            if self.fReader.HasAttributes():
                __Attrs={}
                while self.fReader.MoveToNextAttribute():
                    __Key=self.fReader.Name()
                    __Value=self.fReader.Value()
                    __Attrs[__Key]=__Value
            else:
                __Attrs=None
            if __Type in [libxml2.XML_READER_TYPE_NONE]:
                pass
            elif __Type in [libxml2.XML_READER_TYPE_ELEMENT]:
                self.startElement(__Name,__Attrs)
                if __IsEmpty:
                    self.endElement(__Name,__IsEmpty)
            elif __Type in [libxml2.XML_READER_TYPE_ATTRIBUTE]:
                pass # Handled above.
            elif __Type in [libxml2.XML_READER_TYPE_TEXT]:
                self.characters(__Text)
            elif __Type in [libxml2.XML_READER_TYPE_CDATA]:
                self.characters(__Text)
            elif __Type in [libxml2.XML_READER_TYPE_ENTITY_REFERENCE]:
                self.skippedEntity(__Name)
            elif __Type in [libxml2.XML_READER_TYPE_ENTITY]:
                pass
            elif __Type in [libxml2.XML_READER_TYPE_PROCESSING_INSTRUCTION]:
                self.processingInstruction(__Name,__Text)
            elif __Type in [libxml2.XML_READER_TYPE_COMMENT]:
                pass
            elif __Type in [libxml2.XML_READER_TYPE_DOCUMENT]:
                self.startDocument()
            elif __Type in [libxml2.XML_READER_TYPE_DOCUMENT_TYPE]:
                pass
            elif __Type in [libxml2.XML_READER_TYPE_DOCUMENT_FRAGMENT]:
                pass
            elif __Type in [libxml2.XML_READER_TYPE_NOTATION]:
                pass
            elif __Type in [libxml2.XML_READER_TYPE_WHITESPACE]:
                self.ignorableWhitespace()
            elif __Type in [libxml2.XML_READER_TYPE_SIGNIFICANT_WHITESPACE]:
                self.characters(__Text)
            elif __Type in [libxml2.XML_READER_TYPE_END_ELEMENT]:
                self.endElement(__Name)
            elif __Type in [libxml2.XML_READER_TYPE_END_ENTITY]:
                pass
            elif __Type in [libxml2.XML_READER_TYPE_XML_DECLARATION]:
                pass
        return

class cStateStack(list):
    def __init__(self):
        self.fPoppedItem=None
        return
    def Push(self,aItem):
        self.append(aItem)
        return
    def Pop(self,aItem,aEmptyElt=False):
        if self.IsMostRecently(['%s*'%aItem],aIgnoreCase=False):
            __Result=self.pop()
            if aEmptyElt:
                self.fPoppedItem=__Result
            else:
                self.fPoppedItem=None
            return __Result
        elif aItem==self.fPoppedItem: # We've inexplicably received an 
endElement for an empty element.
            self.fPoppedItem=None
            return None
        else:
            sys.stderr.write('''
            State stack corrupted.  %s closing but not most recent.  Stack 
unchanged.
            '''%aItem)
            print self
            sys.exit('halt')
    def GetDepth(self):
        return len(self)
    def IsMostRecently(self,aList,aIgnoreCase=True):

        def Test(aDepth):
            if aIgnoreCase:
                __EltList=aList[-aDepth].lower()
                __EltStack=self[-aDepth].lower()
            else:
                __EltList=aList[-aDepth]
                __EltStack=self[-aDepth]
            if __EltList==__EltStack:
                return True
            __Pos=__EltList.find('*')
            if __Pos==NA:
                return False
            else:
                return (__EltList[:__Pos]==__EltStack[:__Pos])
            
        __Depth=len(aList)
        if __Depth<=self.GetDepth():
            __Result=Test(__Depth)
            while (__Depth>1) and __Result:
                __Depth-=1
                __Result=Test(__Depth)
            return __Result
        else:
            return False

class cBook(cDocHandler):
    def __init__(self,aReader,aPackageList):
        cDocHandler.__init__(self,aReader)
        self.fPackageList=aPackageList
        self.fStateStack=cStateStack()
        self.fBuffer=NULL
        self.fPackageName=NULL
        self.fPackageVersion=NULL
        self.fURL=NULL
        self.fExternal=NULL
        self.fPackage=None
        self.fCapture=False
        return
    def startElement(self,aName,aAttrs):
        self.ProcessElement(aName,'init',aAttrs=aAttrs)
        return
    def endElement(self,aName,aIsEmpty=False):
        self.ProcessElement(aName,'term',aIsEmpty=aIsEmpty)
        return
    def characters(self,aContent):
        if self.fCapture:
            self.fBuffer+=aContent
        return
    def ProcessElement(self,aName,aFunction,aAttrs=None,aIsEmpty=False):

        def Buffer(aIsInit):
            self.fCapture=aIsInit
            if self.fCapture:
                self.fBuffer=NULL
            return
        def PushElement(aIsInit,aName):
            if aIsInit:
                if aName in ['bridgehead']:
                    aName+=__SafeAttrs.get('renderas',NULL)
                self.fStateStack.Push(aName)
            return
        def PopElement(aIsInit,aName,aIsEmpty):
            if not aIsInit:
                self.fStateStack.Pop(aName,aIsEmpty)
            return
        def ProcessSect1(aIsInit):
            if aIsInit:
                self.fPackageName=__SafeAttrs.get('id')
                self.fPackageVersion=__SafeAttrs.get('xreflabel')
            else:
                if self.fPackage==None:
                    pass
                else:
                    self.fPackageList.Append(self.fPackage)
                    self.fPackage=None
            return
        def ProcessTitle(aIsInit):
            Buffer(aIsInit)
            if not aIsInit:
                self.fTitle=self.fBuffer
                if self.fPackageVersion in [NULL,None]:
                    self.fPackageVersion=self.fTitle
            return
        def ProcessPackage(aIsInit):
            if not aIsInit:
                if self.fTitle.lower().startswith('package information'):
                    if self.fPackage==None:
                        self.fPackage=PackageDB.cPackage()
                        self.fPackage.SetNameVersion(self.fPackageName,
                                                     self.fPackageVersion)
                        if OPTS.Verbose==True:
                            
sys.stderr.write('%s\n'%self.fPackage.GetNameVersion())
            return
        def ProcessURL(aIsInit):
            if aIsInit:
                self.fURL=__SafeAttrs.get('url').strip()
            else:
                if self.fPackage==None:
                    pass
                else:
                    if self.fTitle.lower().startswith('package information'):
                        if self.fURL in [NULL,None]:
                            pass
                        else:
                            self.fPackage.fArchiveList=[self.fURL]
                    elif self.fTitle.lower().startswith(
                        'additional download'):
                        self.fPackage.AppendPatch(self.fURL)
            return
        def ProcessConjunction(aIsInit):
            if self.fTitle.lower().startswith('required') or \
                   self.fTitle.lower().startswith('recommended'):
                Buffer(aIsInit)
                if aIsInit:
                    pass
                else:
                    __Tokens=[__Tok.lower() for __Tok in self.fBuffer.split()]
                    if 'or' in __Tokens:
                        self.fPackage.AppendDependency('or')
            return
        def ProcessExternal(aIsInit):
            if aIsInit:
                self.fExternal=__SafeAttrs.get('linkend')
            else:
                if self.fTitle.lower().startswith('required') or \
                       self.fTitle.lower().startswith('recommended'):
                    if self.fPackage==None:
                        pass
                    else:
                        self.fPackage.AppendDependency(self.fExternal)
            return
        def ProcessForeign(aIsInit):
            Buffer(aIsInit)
            if not aIsInit:
                if self.fTitle.lower().startswith('required') or \
                       self.fTitle.lower().startswith('recommended'):
                    if self.fPackage==None:
                        pass
                    else:
                        self.fPackage.AppendDependency(self.fBuffer)
            return
        def ProcessCode(aIsInit):
            Buffer(aIsInit)
            if not aIsInit:
                if self.fPackage==None:
                    pass
                else:
                    self.fPackage.AppendCommand('# 
%s\n%s'%(self.fTitle,self.fBuffer))
            return
    
        __IsInit=aFunction in ['init']
        __SafeAttrs={}
        if aAttrs is None:
            pass
        else:
            for (__Key,__Value) in aAttrs.iteritems():
                __SafeAttrs[__Key.lower()]=__Value
        PushElement(__IsInit,aName)
        if self.fStateStack.IsMostRecently(['sect1']):
            ProcessSect1(__IsInit)
        elif (self.fStateStack.IsMostRecently(['title']) or
              self.fStateStack.IsMostRecently(['bridgehead*'])):
            ProcessTitle(__IsInit)
            if (self.fStateStack.IsMostRecently(['sect3','title']) or 
                self.fStateStack.IsMostRecently(['bridgeheadsect3'])):
                ProcessPackage(__IsInit)
        elif 
self.fStateStack.IsMostRecently(['itemizedlist','listitem','para','ulink']):
            ProcessURL(__IsInit)
        elif self.fStateStack.IsMostRecently(['para']):
            ProcessConjunction(__IsInit)
        elif self.fStateStack.IsMostRecently(['para','xref']):
            ProcessExternal(__IsInit)
        elif self.fStateStack.IsMostRecently(['para','ulink']):
            ProcessForeign(__IsInit)
        elif self.fStateStack.IsMostRecently(['screen','userinput']):
            ProcessCode(__IsInit)
        PopElement(__IsInit,aName,aIsEmpty)
        return

__Parser=optparse.OptionParser()
__DefaultIndexDoc=os.path.expanduser('~/BLFS/BOOK/index.xml')
__Parser.add_option('-I','--IndexDoc',
                    help='Root of the XML document.  Default is %s.' % 
(__DefaultIndexDoc),
                    default=__DefaultIndexDoc)
__Parser.add_option('-P','--PackageDB',
                    help='Output file name to receive package database.  
Default is > stdout.')
__Parser.add_option('-V','--Verbose',
                    action='store_true',
                    help='List package names during processing.',
                    default=False)
(OPTS,__Args)=__Parser.parse_args()
if len(__Args)>ZERO:
    __Parser.error('Arguments are prohibited.')
if os.path.exists(OPTS.IndexDoc):
    pass
else:
    __Parser.error(OPTS.IndexDoc+' not found.')
if OPTS.PackageDB in [None,NULL,'> stdout']:
    OPTS.PackageDB=None

__Flags=(
    libxml2.XML_PARSE_XINCLUDE| # Expand xincludes.
    libxml2.XML_PARSE_NOENT|    # Expand entities.  I know this doesn't look 
right.
    libxml2.XML_PARSE_NOBLANKS| # Suppress whitespace.
    ZERO)
__Book=cBook(libxml2.readerForFile(OPTS.IndexDoc,
                                   'ascii',
                                   __Flags,
                                   ),
             PackageDB.PackageList,
             )
__Book.ReadLoop()
PackageDB.Store(OPTS.PackageDB)

# Fin
-- 
http://linuxfromscratch.org/mailman/listinfo/blfs-support
FAQ: http://www.linuxfromscratch.org/blfs/faq.html
Unsubscribe: See the above information page

Reply via email to