--- repomdorig/dumpMetadata.py	2007-01-03 22:40:52.000000000 -0500
+++ repomd/dumpMetadata.py	2007-01-04 15:32:14.000000000 -0500
@@ -124,7 +124,7 @@
         raise MDError, 'Error opening file for checksum: %s' % file
 
 
-def utf8String(string):
+def utf8String(string, encoding=None):
     """hands back a unicoded string"""
     if string is None:
         return ''
@@ -134,7 +134,10 @@
         x = unicode(string, 'ascii')
         return string
     except UnicodeError:
-        encodings = ['utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2']
+        if encoding:
+            encodings = [encoding]
+        else:
+            encodings = ['utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2']
         for enc in encodings:
             try:
                 x = unicode(string, enc)
@@ -229,6 +232,7 @@
            self.noepoch = ""
         else:
            self.noepoch = 0
+        self.encoding = options.get('encoding')
         self.relativepath = filename
         fd = returnFD(os.path.join(basedir, filename))
         self.hdr = returnHdr(ts, fd)
@@ -416,15 +420,20 @@
                 u.append(x)
         return u
 
+    def _utf8AsNeeded(self, data):
+        if isinstance(data, types.StringType):
+            data = utf8String(data, self.encoding)
+        return data
+
     def tagByName(self, tag):
         data = self.hdr[tag]
         if type(data) is types.ListType:
             if len(data) > 0:
-                return data[0]
+                return self._utf8AsNeeded(data[0])
             else:
                 return ''
         else:
-            return data
+            return self._utf8AsNeeded(data)
     
     def listTagByName(self, tag):
         """take a tag that should be a list and make sure it is one"""
@@ -434,9 +443,9 @@
             return lst
             
         if type(data) is types.ListType:
-            lst.extend(data)
+            lst.extend(map(self._utf8AsNeeded, data))
         else:
-            lst.append(data)
+            lst.append(self._utf8AsNeeded(data))
         return lst
 
         
@@ -504,7 +513,7 @@
             for glob in self.dirrc:
                 if glob.match(item):
                     returns[item] = 1
-        return returns.keys()
+        return returns
 
     
     def depsList(self):
@@ -517,6 +526,7 @@
         prereq = self._checkPreReq(tmpflags)
         ver = self._correctVersion(self.hdr[rpm.RPMTAG_REQUIREVERSION])
         if names is not None:
+            names = map(self._utf8AsNeeded, names)
             lst = zip(names, flags, ver, prereq)
         return self._uniq(lst)
         
@@ -527,6 +537,7 @@
         flags = self._correctFlags(tmpflags)
         ver = self._correctVersion(self.hdr[rpm.RPMTAG_OBSOLETEVERSION])
         if names is not None:
+            names = map(self._utf8AsNeeded, names)
             lst = zip(names, flags, ver)
         return self._uniq(lst)
 
@@ -537,6 +548,7 @@
         flags = self._correctFlags(tmpflags)
         ver = self._correctVersion(self.hdr[rpm.RPMTAG_CONFLICTVERSION])
         if names is not None:
+            names = map(self._utf8AsNeeded, names)
             lst = zip(names, flags, ver)
         return self._uniq(lst)
 
@@ -547,6 +559,7 @@
         flags = self._correctFlags(tmpflags)
         ver = self._correctVersion(self.hdr[rpm.RPMTAG_PROVIDEVERSION])
         if names is not None:
+            names = map(self._utf8AsNeeded, names)
             lst = zip(names, flags, ver)
         return self._uniq(lst)
         
@@ -608,7 +621,6 @@
     csum.newProp('pkgid', 'YES')
     for tag in ['summary', 'description', 'packager', 'url']:
         value = rpmObj.tagByName(tag)
-        value = utf8String(value)
         value = re.sub("\n$", '', value)
         entry = pkgNode.newChild(None, tag, None)
         entry.addContent(value)
@@ -624,14 +636,16 @@
     if rpmObj.localurl is not None:
         location.newProp('xml:base', rpmObj.localurl)
     location.newProp('href', rpmObj.relativepath)
+    if rpmObj.encoding is not None:
+        encoding = pkgNode.newChild(None, 'encoding', None)
+        encoding.newProp('charset', rpmObj.encoding)
     format = pkgNode.newChild(ns, 'format', None)
     for tag in ['license', 'vendor', 'group', 'buildhost', 'sourcerpm']:
         value = rpmObj.tagByName(tag)
-        value = utf8String(value)
         value = re.sub("\n$", '', value)
         entry = format.newChild(formatns, tag, None)
         entry.addContent(value)
-        
+
     hr = format.newChild(formatns, 'header-range', None)
     hr.newProp('start', str(rpmObj.rangestart))
     hr.newProp('end', str(rpmObj.rangeend))
@@ -683,16 +697,13 @@
         
     for file in rpmObj.usefulFiles():
         files = format.newChild(None, 'file', None)
-        file = utf8String(file)
         files.addContent(file)
     for directory in rpmObj.usefulDirs():
         files = format.newChild(None, 'file', None)
-        directory = utf8String(directory)
         files.addContent(directory)
         files.newProp('type', 'dir')
     for directory in rpmObj.usefulGhosts():
         files = format.newChild(None, 'file', None)
-        directory = utf8String(directory)
         files.addContent(directory)
         files.newProp('type', 'ghost')
 
@@ -710,16 +721,13 @@
     version.newProp('rel', str(rpmObj.tagByName('release')))
     for file in rpmObj.filenames:
         files = pkg.newChild(None, 'file', None)
-        file = utf8String(file)
         files.addContent(file)
     for directory in rpmObj.dirnames:
         files = pkg.newChild(None, 'file', None)
-        directory = utf8String(directory)
         files.addContent(directory)
         files.newProp('type', 'dir')
     for ghost in rpmObj.ghostnames:
         files = pkg.newChild(None, 'file', None)
-        ghost = utf8String(ghost)
         files.addContent(ghost)
         files.newProp('type', 'ghost')
     return pkg
@@ -737,8 +745,8 @@
     clogs = rpmObj.changelogLists()
     for (name, time, text) in clogs:
         clog = pkg.newChild(None, 'changelog', None)
-        clog.addContent(utf8String(text))
-        clog.newProp('author', utf8String(name))
+        clog.addContent(text)
+        clog.newProp('author', name)
         clog.newProp('date', str(time))
     return pkg
     
