Reviewed-by: Yonghong Zhu <yonghong....@intel.com>

Best Regards,

Zhu Yonghong

-----Original Message-----
From: Justen, Jordan L 
Sent: Saturday, December 05, 2015 4:13 AM
To: edk2-devel@lists.01.org
Cc: Justen, Jordan L; Zhu, Yonghong; Gao, Liming; Kinney, Michael D
Subject: [PATCH v2 01/20] BaseTools/Scripts: Add ConvertUni.py script

This script uses python codecs to convert .uni string files between the utf-16 
and utf-8 formats.

The advantages of utf-8 data:
 * Generally smaller files
 * More commonly supported by editors
 * Not treated as binary data in patch files

The script was tested on MdePkg with both python 2.7 and python 3.4.
It was able to convert all MdePkg .uni files between utf-8 and utf-16 multiple 
times always producing the same files for each format.

v2:
 * Rename ConvertUtf16ToUtf8.py to ConvertUni.py
 * Also support utf-8 to utf-16 conversion (with --utf-16)

Cc: Yonghong Zhu <yonghong....@intel.com>
Cc: Liming Gao <liming....@intel.com>
Cc: Michael D Kinney <michael.d.kin...@intel.com>
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com>
Reviewed-by: Jaben Carsey <jaben.car...@intel.com>
---
 BaseTools/Scripts/ConvertUni.py | 137 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 137 insertions(+)
 create mode 100755 BaseTools/Scripts/ConvertUni.py

diff --git a/BaseTools/Scripts/ConvertUni.py b/BaseTools/Scripts/ConvertUni.py 
new file mode 100755 index 0000000..2af55df
--- /dev/null
+++ b/BaseTools/Scripts/ConvertUni.py
@@ -0,0 +1,137 @@
+## @file
+#  Check a patch for various format issues # #  Copyright (c) 2015, 
+Intel Corporation. All rights reserved.<BR> # #  This program and the 
+accompanying materials are licensed and made #  available under the 
+terms and conditions of the BSD License which #  accompanies this 
+distribution. The full text of the license may be #  found at 
+http://opensource.org/licenses/bsd-license.php
+#
+#  THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS"
+#  BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER #  
+EXPRESS OR IMPLIED.
+#
+
+from __future__ import print_function
+
+VersionNumber = '0.1'
+__copyright__ = "Copyright (c) 2015, Intel Corporation  All rights reserved."
+
+import argparse
+import codecs
+import os
+import sys
+
+try:
+    from io import StringIO
+except ImportError:
+    from StringIO import StringIO
+
+class ConvertOneArg:
+    """Converts utf-16 to utf-8 for one command line argument.
+
+       This could be a single file, or a directory.
+    """
+
+    def __init__(self, utf8, source):
+        self.utf8 = utf8
+        self.source = source
+
+        self.ok = True
+
+        if not os.path.exists(source):
+            self.ok = False
+        elif os.path.isdir(source):
+            for (root, dirs, files) in os.walk(source):
+                files = filter(lambda a: a.endswith('.uni'), files)
+                for filename in files:
+                    path = os.path.join(root, filename)
+                    self.ok &= self.convert_one_file(path)
+                    if not self.ok:
+                        break
+
+                if not self.ok:
+                    break
+        else:
+            self.ok &= self.convert_one_file(source)
+
+    def convert_one_file(self, source):
+        if self.utf8:
+            new_enc, old_enc = 'utf-8', 'utf-16'
+        else:
+            new_enc, old_enc = 'utf-16', 'utf-8'
+        #
+        # Read file
+        #
+        f = open(source, mode='rb')
+        file_content = f.read()
+        f.close()
+
+        #
+        # Detect UTF-16 Byte Order Mark at beginning of file.
+        #
+        bom = (file_content.startswith(codecs.BOM_UTF16_BE) or
+               file_content.startswith(codecs.BOM_UTF16_LE))
+        if bom != self.utf8:
+            print("%s: already %s" % (source, new_enc))
+            return True
+
+        #
+        # Decode old string data
+        #
+        str_content = file_content.decode(old_enc, 'ignore')
+
+        #
+        # Encode new string data
+        #
+        new_content = str_content.encode(new_enc, 'ignore')
+
+        #
+        # Write converted data back to file
+        #
+        f = open(source, mode='wb')
+        f.write(new_content)
+        f.close()
+
+        print(source + ": converted, size", len(file_content), '=>', 
len(new_content))
+        return True
+
+
+class ConvertUniApp:
+    """Converts .uni files between utf-16 and utf-8."""
+
+    def __init__(self):
+        self.parse_options()
+        sources = self.args.source
+
+        self.ok = True
+        for patch in sources:
+            self.process_one_arg(patch)
+
+        if self.ok:
+            self.retval = 0
+        else:
+            self.retval = -1
+
+    def process_one_arg(self, arg):
+        self.ok &= ConvertOneArg(self.utf8, arg).ok
+
+    def parse_options(self):
+        parser = argparse.ArgumentParser(description=__copyright__)
+        parser.add_argument('--version', action='version',
+                            version='%(prog)s ' + VersionNumber)
+        parser.add_argument('source', nargs='+',
+                            help='[uni file | directory]')
+        group = parser.add_mutually_exclusive_group()
+        group.add_argument("--utf-8",
+                           action="store_true",
+                           help="Convert from utf-16 to utf-8 [default]")
+        group.add_argument("--utf-16",
+                           action="store_true",
+                           help="Convert from utf-8 to utf-16")
+        self.args = parser.parse_args()
+        self.utf8 = not self.args.utf_16
+
+if __name__ == "__main__":
+    sys.exit(ConvertUniApp().retval)
--
2.6.2

_______________________________________________
edk2-devel mailing list
edk2-devel@lists.01.org
https://lists.01.org/mailman/listinfo/edk2-devel

Reply via email to