On 2015-05-05 00:53:03, Gao, Liming wrote: > Jordan: > BaseTools UPT (Source\Python\UPT\Library\UniClassObject.py) also > parses UNI file. Could you update it together? Or, you expect UPT > tool owner to follow up?
UPT doesn't attempt to use any common code? This seems like a waste of effort, and a recipe for inconsistency. For an example of wasted effort, see this discussion, and the potential duplication of code here. For an example of inconsistency, try diffing the two versions of UniClassObject.py. -Jordan > -----Original Message----- > From: Jordan Justen [mailto:jordan.l.jus...@intel.com] > Sent: Tuesday, May 05, 2015 3:09 PM > To: edk2-devel@lists.sourceforge.net > Subject: [edk2] [PATCH v2 1/7] BaseTools: Support UTF-8 string data in .uni > files > > Since UEFI only support UTF-16LE strings internally, this simply allows for > another unicode the source file encoding. > > The strings are still converted to UTF-16LE data for use in EDK II source > code. > > When .uni files contain UTF-16 data, it is impossible for unicode code points > to be larger than 0xFFFF. To support .uni files that contain > UTF-8 data, we also need to also deal with the possibility that the > UTF-8 file contains unicode code points larger than 16-bits. Since UEFI only > supports 16-bit string data, we make UniClassObject generate an error if a > larger code point is seen in a UTF-8 string value. > > We only check string value data, so it is possible to use larger code points > in comments. > > v2: > * Drop .utf8 extension. Use .uni file for UTF-8 data (mdkinney) > * Merge in 'BaseTools/UniClassObject: Verify string data is 16-bit' > commit > > Cc: Yingke D Liu <yingke.d....@intel.com> > Cc: Michael D Kinney <michael.d.kin...@intel.com> > Contributed-under: TianoCore Contribution Agreement 1.0 > Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com> > --- > BaseTools/Source/Python/AutoGen/UniClassObject.py | 38 > +++++++++++++++++++++-- > 1 file changed, 36 insertions(+), 2 deletions(-) > > diff --git a/BaseTools/Source/Python/AutoGen/UniClassObject.py > b/BaseTools/Source/Python/AutoGen/UniClassObject.py > index aa54f4f..41448ab 100644 > --- a/BaseTools/Source/Python/AutoGen/UniClassObject.py > +++ b/BaseTools/Source/Python/AutoGen/UniClassObject.py > @@ -209,7 +209,7 @@ class UniFileClassObject(object): > Lang = distutils.util.split_quoted((Line.split(u"//")[0])) > if len(Lang) != 3: > try: > - FileIn = codecs.open(LongFilePath(File.Path), mode='rb', > encoding='utf-16').read() > + FileIn = self.OpenUniFile(LongFilePath(File.Path)) > except UnicodeError, X: > EdkLogger.error("build", FILE_READ_FAILURE, "File read > failure: %s" % str(X), ExtraData=File); > except: > @@ -253,6 +253,38 @@ class UniFileClassObject(object): > self.OrderedStringDict[LangName][Item.StringName] = > len(self.OrderedStringList[LangName]) - 1 > return True > > + def OpenUniFile(self, FileName): > + Encoding = 'utf-8' > + UniFile = open(FileName, 'rb') > + > + # > + # Seek to end of file to determine its size > + # > + UniFile.seek(0, 2) > + FileSize = UniFile.tell() > + > + if FileSize >= 2: > + # > + # Seek to start of the file to read the UTF-16 BOM > + # > + UniFile.seek(0, 0) > + Bom = UniFile.read(2) > + UniFile.seek(0, 0) > + > + if Bom == '\xff\xfe': > + Encoding = 'utf-16' > + > + Info = codecs.lookup(Encoding) > + return codecs.StreamReaderWriter(UniFile, Info.streamreader, > + Info.streamwriter) > + > + def Verify16bitCodePoints(self, String): > + for cp in String: > + if ord(cp) > 0xffff: > + tmpl = 'The string {} defined in file {} ' + \ > + 'contains a character with a code point above 0xFFFF.' > + error = tmpl.format(repr(String), self.File) > + EdkLogger.error('Unicode File Parser', FORMAT_INVALID, > + error) > + > # > # Get String name and value > # > @@ -274,6 +306,7 @@ class UniFileClassObject(object): > Language = LanguageList[IndexI].split()[0] > Value = > LanguageList[IndexI][LanguageList[IndexI].find(u'\"') + len(u'\"') : > LanguageList[IndexI].rfind(u'\"')] #.replace(u'\r\n', u'') > Language = GetLanguageCode(Language, self.IsCompatibleMode, > self.File) > + self.Verify16bitCodePoints(Value) > self.AddStringToList(Name, Language, Value) > > # > @@ -305,7 +338,7 @@ class UniFileClassObject(object): > EdkLogger.error("Unicode File Parser", FILE_NOT_FOUND, > ExtraData=File.Path) > > try: > - FileIn = codecs.open(LongFilePath(File.Path), mode='rb', > encoding='utf-16') > + FileIn = self.OpenUniFile(LongFilePath(File.Path)) > except UnicodeError, X: > EdkLogger.error("build", FILE_READ_FAILURE, "File read failure: > %s" % str(X), ExtraData=File.Path); > except: > @@ -426,6 +459,7 @@ class UniFileClassObject(object): > MatchString = re.match('[A-Z0-9_]+', Name, re.UNICODE) > if MatchString == None or MatchString.end(0) != > len(Name): > EdkLogger.error('Unicode File Parser', > FORMAT_INVALID, 'The string token name %s defined in UNI file %s contains the > invalid lower case character.' %(Name, self.File)) > + self.Verify16bitCodePoints(Value) > self.AddStringToList(Name, Language, Value) > continue > > -- > 2.1.4 > > > ------------------------------------------------------------------------------ > One dashboard for servers and applications across Physical-Virtual-Cloud > Widest out-of-the-box monitoring support with 50+ applications > Performance metrics, stats and reports that give you Actionable Insights > Deep dive visibility with transaction tracing using APM Insight. > http://ad.doubleclick.net/ddm/clk/290420510;117567292;y > _______________________________________________ > edk2-devel mailing list > edk2-devel@lists.sourceforge.net > https://lists.sourceforge.net/lists/listinfo/edk2-devel > > ------------------------------------------------------------------------------ > One dashboard for servers and applications across Physical-Virtual-Cloud > Widest out-of-the-box monitoring support with 50+ applications > Performance metrics, stats and reports that give you Actionable Insights > Deep dive visibility with transaction tracing using APM Insight. > http://ad.doubleclick.net/ddm/clk/290420510;117567292;y > _______________________________________________ > edk2-devel mailing list > edk2-devel@lists.sourceforge.net > https://lists.sourceforge.net/lists/listinfo/edk2-devel ------------------------------------------------------------------------------ One dashboard for servers and applications across Physical-Virtual-Cloud Widest out-of-the-box monitoring support with 50+ applications Performance metrics, stats and reports that give you Actionable Insights Deep dive visibility with transaction tracing using APM Insight. http://ad.doubleclick.net/ddm/clk/290420510;117567292;y _______________________________________________ edk2-devel mailing list edk2-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/edk2-devel