commit:     51f3cf18de49ca9dffc34053da53e62930be9fab
Author:     Paul Varner <fuzzyray <AT> gentoo <DOT> org>
AuthorDate: Thu Jun 30 23:09:52 2016 +0000
Commit:     Paul Varner <fuzzyray <AT> gentoo <DOT> org>
CommitDate: Thu Jun 30 23:30:54 2016 +0000
URL:        https://gitweb.gentoo.org/proj/gentoolkit.git/commit/?id=51f3cf18

Change open function to use UTF-8 encoding for content.

Uses io.open for Python 2 and built-in open for Python 3
All data from files is treated as Unicode and this should fix
most UnicodeDecodeErrors.

 pym/gentoolkit/deprecated/helpers.py      |  6 +++++-
 pym/gentoolkit/eclean/exclude.py          |  4 +++-
 pym/gentoolkit/enalyze/rebuild.py         |  5 ++++-
 pym/gentoolkit/equery/uses.py             | 11 +++++++----
 pym/gentoolkit/equery/which.py            |  5 ++++-
 pym/gentoolkit/eshowkw/keywords_header.py |  9 +++++++--
 pym/gentoolkit/glsa/__init__.py           |  5 ++++-
 pym/gentoolkit/helpers.py                 |  3 ++-
 pym/gentoolkit/revdep_rebuild/analyse.py  |  6 +++++-
 pym/gentoolkit/revdep_rebuild/cache.py    | 13 +++++++++----
 pym/gentoolkit/revdep_rebuild/collect.py  |  8 ++++++--
 pym/gentoolkit/revdep_rebuild/settings.py |  4 +++-
 pym/gentoolkit/test/eclean/creator.py     |  6 +++++-
 13 files changed, 64 insertions(+), 21 deletions(-)

diff --git a/pym/gentoolkit/deprecated/helpers.py 
b/pym/gentoolkit/deprecated/helpers.py
index 81fa45c..bb0fb7e 100644
--- a/pym/gentoolkit/deprecated/helpers.py
+++ b/pym/gentoolkit/deprecated/helpers.py
@@ -11,6 +11,10 @@ from __future__ import print_function
 
 import warnings
 
+import sys
+if sys.hexversion < 0x3000000:
+       from io import open
+
 import portage
 from portage import _encodings, _unicode_decode, _unicode_encode
 from gentoolkit import *
@@ -101,7 +105,7 @@ def find_world_packages(prefilter=None):
        """Returns a tuple of lists, first list is resolved world packages,
        seond is unresolved package names."""
        f = open(_unicode_encode(portage.root+portage.WORLD_FILE,
-               encoding=_encodings['fs']))
+               encoding=_encodings['fs']), encoding=_encodings['content'])
        pkglist = f.readlines()
        resolved = []
        unresolved = []

diff --git a/pym/gentoolkit/eclean/exclude.py b/pym/gentoolkit/eclean/exclude.py
index d19c1d1..513346d 100644
--- a/pym/gentoolkit/eclean/exclude.py
+++ b/pym/gentoolkit/eclean/exclude.py
@@ -9,6 +9,8 @@ from __future__ import print_function
 
 import os
 import sys
+if sys.hexversion < 0x3000000:
+       from io import open
 import re
 import portage
 from portage import _encodings, _unicode_decode, _unicode_encode
@@ -83,7 +85,7 @@ def parseExcludeFile(filepath, output):
        output("Parsing Exclude file: " + filepath)
        try:
                file_ = open(_unicode_encode(filepath, 
-                       encoding=_encodings['fs']), mode="r")
+                       encoding=_encodings['fs']), mode="r", 
encoding=_encodings['content'])
        except IOError:
                raise ParseExcludeFileException("Could not open exclusion file: 
" +
                        filepath)

diff --git a/pym/gentoolkit/enalyze/rebuild.py 
b/pym/gentoolkit/enalyze/rebuild.py
index 11feb31..53fded4 100644
--- a/pym/gentoolkit/enalyze/rebuild.py
+++ b/pym/gentoolkit/enalyze/rebuild.py
@@ -15,6 +15,8 @@ from __future__ import print_function
 
 import os
 import sys
+if sys.hexversion < 0x3000000:
+       from io import open
 
 import gentoolkit
 from gentoolkit.module_base import ModuleBase
@@ -352,7 +354,8 @@ class Rebuild(ModuleBase):
                """
                if  not self.options["quiet"]:
                        print('   - Saving file: %s' %filepath)
-               with open(_unicode_encode(filepath, encoding=_encodings['fs']), 
mode="w") as output:
+               with open(_unicode_encode(filepath, encoding=_encodings['fs']), 
mode="w",
+                               encoding=_encodings['content']) as output:
                        output.write('\n'.join(data))
                print("   - Done")
 

diff --git a/pym/gentoolkit/equery/uses.py b/pym/gentoolkit/equery/uses.py
index 79f1118..a8f13dc 100644
--- a/pym/gentoolkit/equery/uses.py
+++ b/pym/gentoolkit/equery/uses.py
@@ -16,6 +16,9 @@ __docformat__ = 'epytext'
 
 import os
 import sys
+if sys.hexversion < 0x3000000:
+       from io import open
+
 from functools import partial
 from getopt import gnu_getopt, GetoptError
 from glob import glob
@@ -136,9 +139,9 @@ def get_global_useflags():
        # Get global USE flag descriptions
        try:
                path = os.path.join(settings["PORTDIR"], 'profiles', 'use.desc')
-               with open(_unicode_encode(path, encoding=_encodings['fs'])) as 
open_file:
+               with open(_unicode_encode(path, encoding=_encodings['fs']),
+                               encoding=_encodings['content']) as open_file:
                        for line in open_file:
-                               line = _unicode_decode(line)
                                if line.startswith('#'):
                                        continue
                                # Ex. of fields: ['syslog', 'Enables support 
for syslog\n']
@@ -157,9 +160,9 @@ def get_global_useflags():
        for path in glob(os.path.join(settings["PORTDIR"],
                'profiles', 'desc', '*.desc')):
                try:
-                       with open(_unicode_encode(path, 
encoding=_encodings['fs'])) as open_file:
+                       with open(_unicode_encode(path, 
encoding=_encodings['fs']),
+                                       encoding=_encodings['content']) as 
open_file:
                                for line in open_file:
-                                       line = _unicode_decode(line)
                                        if line.startswith('#'):
                                                continue
                                        fields = [field.strip() for field in 
line.split(" - ", 1)]

diff --git a/pym/gentoolkit/equery/which.py b/pym/gentoolkit/equery/which.py
index 0d30a8d..137e52a 100644
--- a/pym/gentoolkit/equery/which.py
+++ b/pym/gentoolkit/equery/which.py
@@ -18,6 +18,8 @@ __docformat__ = 'epytext'
 
 import os
 import sys
+if sys.hexversion < 0x3000000:
+       from io import open
 from getopt import gnu_getopt, GetoptError
 
 
@@ -62,7 +64,8 @@ def print_help(with_description=True):
 
 def print_ebuild(ebuild_path):
        """Output the ebuild to std_out"""
-       with open(_unicode_encode(ebuild_path, encoding=_encodings['fs'])) as f:
+       with open(_unicode_encode(ebuild_path, encoding=_encodings['fs']),
+                       encoding=_encodings['content']) as f:
                lines = f.readlines()
                print("\n\n")
                print("".join(lines))

diff --git a/pym/gentoolkit/eshowkw/keywords_header.py 
b/pym/gentoolkit/eshowkw/keywords_header.py
index bc5c0c5..1b9af0d 100644
--- a/pym/gentoolkit/eshowkw/keywords_header.py
+++ b/pym/gentoolkit/eshowkw/keywords_header.py
@@ -6,6 +6,9 @@ __all__ = ['keywords_header']
 
 import portage
 import os
+import sys
+if sys.hexversion < 0x3000000:
+       from io import open
 from portage import _encodings, _unicode_decode, _unicode_encode
 from portage import settings as ports
 from portage.output import colorize
@@ -31,7 +34,8 @@ def load_profile_data(portdir=None, repo='gentoo'):
 
        try:
                arch_list = os.path.join(portdir, 'profiles', 'arch.list')
-               with open(_unicode_encode(arch_list, 
encoding=_encodings['fs'])) as f:
+               with open(_unicode_encode(arch_list, encoding=_encodings['fs']),
+                               encoding=_encodings['content']) as f:
                        for line in f:
                                line = line.split('#', 1)[0].strip()
                                if line:
@@ -47,7 +51,8 @@ def load_profile_data(portdir=None, repo='gentoo'):
                        None: 3,
                }
                profiles_list = os.path.join(portdir, 'profiles', 
'profiles.desc')
-               with open(_unicode_encode(profiles_list, 
encoding=_encodings['fs'])) as f:
+               with open(_unicode_encode(profiles_list, 
encoding=_encodings['fs']),
+                               encoding=_encodings['content']) as f:
                        for line in f:
                                line = line.split('#', 1)[0].split()
                                if line:

diff --git a/pym/gentoolkit/glsa/__init__.py b/pym/gentoolkit/glsa/__init__.py
index 30a5ae2..ba1eed7 100644
--- a/pym/gentoolkit/glsa/__init__.py
+++ b/pym/gentoolkit/glsa/__init__.py
@@ -17,6 +17,8 @@ __author__ = "Marius Mauch <gen...@gentoo.org>"
 
 
 import sys
+if sys.hexversion < 0x3000000:
+       from io import open
 import os
 try:
     from urllib import urlopen
@@ -705,7 +707,8 @@ class Glsa:
                """
                if not self.isInjected():
                        checkfile = 
open(_unicode_encode(self.config["CHECKFILE"],
-                               encoding=_encodings['fs']), mode="a+")
+                               encoding=_encodings['fs']), mode="a+",
+                               encoding=_encodings['content'])
                        checkfile.write(self.nr+"\n")
                        checkfile.close()
                return None

diff --git a/pym/gentoolkit/helpers.py b/pym/gentoolkit/helpers.py
index b7314b9..0d985b6 100644
--- a/pym/gentoolkit/helpers.py
+++ b/pym/gentoolkit/helpers.py
@@ -195,7 +195,8 @@ class ChangeLog(object):
                result = []
                partial_entries = []
                with open(_unicode_encode(self.changelog_path, 
-                       encoding=_encodings['fs'], errors="replace")) as log:
+                       encoding=_encodings['fs'], errors="replace"),
+                       encoding=_encodings['content']) as log:
                        for line in log:
                                if line.startswith('#'):
                                        continue

diff --git a/pym/gentoolkit/revdep_rebuild/analyse.py 
b/pym/gentoolkit/revdep_rebuild/analyse.py
index 59240b4..9f018b5 100644
--- a/pym/gentoolkit/revdep_rebuild/analyse.py
+++ b/pym/gentoolkit/revdep_rebuild/analyse.py
@@ -7,6 +7,9 @@ from __future__ import print_function
 import os
 import re
 import time
+import sys
+if sys.hexversion < 0x3000000:
+       from io import open
 
 from portage import _encodings, _unicode_decode, _unicode_encode
 from portage.output import bold, blue, yellow, green
@@ -83,7 +86,8 @@ def extract_dependencies_from_la(la, libraries, to_check, 
logger):
                if not os.path.exists(_file):
                        continue
 
-               for line in open(_unicode_encode(_file, 
encoding=_encodings['fs']), mode='r').readlines():
+               for line in open(_unicode_encode(_file, 
encoding=_encodings['fs']), mode='r',
+                       encoding=_encodings['content']).readlines():
                        line = line.strip()
                        if line.startswith('dependency_libs='):
                                match = re.match("dependency_libs='([^']+)'", 
line)

diff --git a/pym/gentoolkit/revdep_rebuild/cache.py 
b/pym/gentoolkit/revdep_rebuild/cache.py
index 6d1a1a3..7359d05 100644
--- a/pym/gentoolkit/revdep_rebuild/cache.py
+++ b/pym/gentoolkit/revdep_rebuild/cache.py
@@ -7,6 +7,9 @@ from __future__ import print_function
 
 import os
 import time
+import sys
+if sys.hexversion < 0x3000000:
+       from io import open
 
 from portage import _encodings, _unicode_decode, _unicode_encode
 from portage.output import red
@@ -31,7 +34,7 @@ def read_cache(temp_path=DEFAULTS['DEFAULT_TMP_DIR']):
        try:
                for key,val in ret.items():
                        _file = open(_unicode_encode(os.path.join(temp_path, 
key),
-                               encoding=_encodings['fs']))
+                               encoding=_encodings['fs']), 
encoding=_encodings['content'])
                        for line in _file.readlines():
                                val.add(line.strip())
                        #libraries.remove('\n')
@@ -55,13 +58,14 @@ def save_cache(logger, to_save={}, 
temp_path=DEFAULTS['DEFAULT_TMP_DIR']):
 
        try:
                _file = open(_unicode_encode(os.path.join(temp_path, 
'timestamp'),
-                       encoding=_encodings['fs']), mode='w')
+                       encoding=_encodings['fs']), mode='w', 
encoding=_encodings['content'])
                _file.write(str(int(time.time())))
                _file.close()
 
                for key,val in to_save.items():
                        _file = open(_unicode_encode(os.path.join(temp_path, 
key),
-                               encoding=_encodings['fs']), mode='w')
+                               encoding=_encodings['fs']), mode='w',
+                               encoding=_encodings['content'])
                        for line in val:
                                _file.write(line + '\n')
                        _file.close()
@@ -89,7 +93,8 @@ def check_temp_files(temp_path=DEFAULTS['DEFAULT_TMP_DIR'], 
max_delay=3600,
                return False
 
        try:
-               _file = open(_unicode_encode(timestamp_path, 
encoding=_encodings['fs']))
+               _file = open(_unicode_encode(timestamp_path, 
encoding=_encodings['fs']),
+                       encoding=_encodings['content'])
                timestamp = int(_file.readline())
                _file .close()
        except Exception as ex:

diff --git a/pym/gentoolkit/revdep_rebuild/collect.py 
b/pym/gentoolkit/revdep_rebuild/collect.py
index 1f34f1c..ab3ef97 100644
--- a/pym/gentoolkit/revdep_rebuild/collect.py
+++ b/pym/gentoolkit/revdep_rebuild/collect.py
@@ -9,6 +9,8 @@ import os
 import glob
 import stat
 import sys
+if sys.hexversion < 0x3000000:
+       from io import open
 
 import portage
 from portage import _encodings, _unicode_decode, _unicode_encode
@@ -35,7 +37,8 @@ def parse_conf(conf_file, visited=None, logger=None):
 
        for conf in conf_file:
                try:
-                       with open(_unicode_encode(conf, 
encoding=_encodings['fs'])) as _file:
+                       with open(_unicode_encode(conf, 
encoding=_encodings['fs']),
+                                       encoding=_encodings['content']) as 
_file:
                                for line in _file.readlines():
                                        line = line.strip()
                                        if line.startswith('#'):
@@ -77,7 +80,8 @@ def prepare_search_dirs(logger, settings):
        #try:
        with open(_unicode_encode(os.path.join(
                portage.root, settings['DEFAULT_ENV_FILE']),
-               encoding=_encodings['fs']), mode='r') as _file:
+               encoding=_encodings['fs']), mode='r',
+               encoding=_encodings['content']) as _file:
                for line in _file.readlines():
                        line = line.strip()
                        match = re.match("^export (ROOT)?PATH='([^']+)'", line)

diff --git a/pym/gentoolkit/revdep_rebuild/settings.py 
b/pym/gentoolkit/revdep_rebuild/settings.py
index 14f5bc8..589ea29 100644
--- a/pym/gentoolkit/revdep_rebuild/settings.py
+++ b/pym/gentoolkit/revdep_rebuild/settings.py
@@ -7,6 +7,8 @@ from __future__ import print_function
 import argparse
 import os
 import sys
+if sys.hexversion < 0x3000000:
+       from io import open
 import re
 import glob
 
@@ -138,7 +140,7 @@ def parse_revdep_config(revdep_confdir):
 
        for _file in os.listdir(revdep_confdir):
                for line in open(_unicode_encode(os.path.join(revdep_confdir, 
_file),
-                               encoding=_encodings['fs'])):
+                               encoding=_encodings['fs']), 
encoding=_encodings['content']):
                        line = line.strip()
                        #first check for comment, we do not want to regex all 
lines
                        if not line.startswith('#'):

diff --git a/pym/gentoolkit/test/eclean/creator.py 
b/pym/gentoolkit/test/eclean/creator.py
index db0eba4..63bffd1 100644
--- a/pym/gentoolkit/test/eclean/creator.py
+++ b/pym/gentoolkit/test/eclean/creator.py
@@ -11,10 +11,13 @@ from __future__ import print_function
 
 import os
 import sys
+if sys.hexversion < 0x3000000:
+       from io import open
 import shutil
 import random
 
 import gentoolkit.pprinter as pp
+from portage import _encodings, _unicode_decode, _unicode_encode
 
 __version__= "0.0.1"
 __author__ = "Brian Dolbec"
@@ -54,7 +57,8 @@ def make_dist(path, files, clean_dict=None):
                size = random.randint(1000,5000)
                data = "0" * size
                filepath = os.path.join(path, file_)
-               with open(filepath, 'w', file_mode) as new_file:
+               with open(_unicode_encode(filepath, encoding=_encodings['fs']), 
'w', file_mode,
+                               encoding=_encodings['content']) as new_file:
                        new_file.write(data)
                if file_ not in clean_dict:
                        # it is included in a multifile target

Reply via email to