On 03-May-2008, Piotr Ożarowski wrote: > [Ben Finney, 2008-05-03 15:22] > > I wanted a command-line tool using 'python-chardet' to report on the > > detected character encoding of arbitrary specified files. So I wrote > > one. > > I didn't add such tool to python-chardet package, because /usr/bin/enca > from enca package does all this and even more.
Perhaps, but it doesn't use 'python-chardet' to do so, leaving Debian currently without a command that uses this library. > > The patch 'chardet-1.0.1_unit-test-scaffold.patch' adds unit test > > support modules that I used to develop the program, and may be > > useful for adding more unit tests in future. > > you forgot to attach the patch :-) Argh. I didn't forget; the patches were eaten by bug #211808 of 'reportbug' <URL:http://bugs.debian.org/211808>. Thanks for letting me know. > please attach it and we'll consider adding it to the package Both patches discussed in the initial bug report are now attached to this message. -- \ "I don't care to belong to a club that accepts people like me | `\ as members." -- Groucho Marx | _o__) | Ben Finney <[EMAIL PROTECTED]>
=== added directory 'test' === added file 'test/minimock.py' --- test/minimock.py 1970-01-01 00:00:00 +0000 +++ test/minimock.py 2008-04-30 03:30:45 +0000 @@ -0,0 +1,279 @@ +# (c) 2006 Ian Bicking, Mike Beachy, and contributors +# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php +r""" +minimock is a simple library for doing Mock objects with doctest. +When using doctest, mock objects can be very simple. + +Here's an example of something we might test, a simple email sender:: + + >>> import smtplib + >>> def send_email(from_addr, to_addr, subject, body): + ... conn = smtplib.SMTP('localhost') + ... msg = 'To: %s\nFrom: %s\nSubject: %s\n\n%s' % ( + ... to_addr, from_addr, subject, body) + ... conn.sendmail(from_addr, [to_addr], msg) + ... conn.quit() + +Now we want to make a mock ``smtplib.SMTP`` object. We'll have to +inject our mock into the ``smtplib`` module:: + + >>> smtplib.SMTP = Mock('smtplib.SMTP') + >>> smtplib.SMTP.mock_returns = Mock('smtp_connection') + +Now we do the test:: + + >>> send_email('[EMAIL PROTECTED]', '[EMAIL PROTECTED]', + ... 'Hi there!', 'How is it going?') + Called smtplib.SMTP('localhost') + Called smtp_connection.sendmail( + '[EMAIL PROTECTED]', + ['[EMAIL PROTECTED]'], + 'To: [EMAIL PROTECTED]: [EMAIL PROTECTED]: Hi there!\n\nHow is it going?') + Called smtp_connection.quit() + +Voila! We've tested implicitly that no unexpected methods were called +on the object. We've also tested the arguments that the mock object +got. We've provided fake return calls (for the ``smtplib.SMTP()`` +constructor). These are all the core parts of a mock library. The +implementation is simple because most of the work is done by doctest. +""" + +__all__ = ["mock", "restore", "Mock"] + +import sys +import inspect + +# A list of mocked objects. Each item is a tuple of (original object, +# namespace dict, object name, and a list of object attributes). +# +mocked = [] + +def lookup_by_name(name, nsdicts): + """ + Look up an object by name from a sequence of namespace dictionaries. + Returns a tuple of (nsdict, object, attributes); nsdict is the + dictionary the name was found in, object is the base object the name is + bound to, and the attributes list is the chain of attributes of the + object that complete the name. + + >>> import os + >>> nsdict, name, attributes = lookup_by_name("os.path.isdir", + ... (locals(),)) + >>> name, attributes + ('os', ['path', 'isdir']) + >>> nsdict, name, attributes = lookup_by_name("os.monkey", (locals(),)) + Traceback (most recent call last): + ... + NameError: name 'os.monkey' is not defined + + """ + for nsdict in nsdicts: + attrs = name.split(".") + names = [] + + while attrs: + names.append(attrs.pop(0)) + obj_name = ".".join(names) + + if obj_name in nsdict: + attr_copy = attrs[:] + tmp = nsdict[obj_name] + try: + while attr_copy: + tmp = getattr(tmp, attr_copy.pop(0)) + except AttributeError: + pass + else: + return nsdict, obj_name, attrs + + raise NameError("name '%s' is not defined" % name) + +def mock(name, nsdicts=None, mock_obj=None, **kw): + """ + Mock the named object, placing a Mock instance in the correct namespace + dictionary. If no iterable of namespace dicts is provided, use + introspection to get the locals and globals of the caller of this + function. + + All additional keyword args are passed on to the Mock object + initializer. + + An example of how os.path.isfile is replaced: + + >>> import os + >>> os.path.isfile + <function isfile at ...> + >>> isfile_id = id(os.path.isfile) + >>> mock("os.path.isfile", returns=True) + >>> os.path.isfile + <Mock ... os.path.isfile> + >>> os.path.isfile("/foo/bar/baz") + Called os.path.isfile('/foo/bar/baz') + True + >>> mock_id = id(os.path.isfile) + >>> mock_id != isfile_id + True + + A second mock object will replace the first, but the original object + will be the one replaced with the replace() function. + + >>> mock("os.path.isfile", returns=False) + >>> mock_id != id(os.path.isfile) + True + >>> restore() + >>> os.path.isfile + <function isfile at ...> + >>> isfile_id == id(os.path.isfile) + True + + """ + if nsdicts is None: + stack = inspect.stack() + try: + # stack[1][0] is the frame object of the caller to this function + globals_ = stack[1][0].f_globals + locals_ = stack[1][0].f_locals + nsdicts = (locals_, globals_) + finally: + del(stack) + + if mock_obj is None: + mock_obj = Mock(name, **kw) + + nsdict, obj_name, attrs = lookup_by_name(name, nsdicts) + + # Get the original object and replace it with the mock object. + tmp = nsdict[obj_name] + if not attrs: + original = tmp + nsdict[obj_name] = mock_obj + else: + for attr in attrs[:-1]: + tmp = getattr(tmp, attr) + original = getattr(tmp, attrs[-1]) + setattr(tmp, attrs[-1], mock_obj) + + mocked.append((original, nsdict, obj_name, attrs)) + +def restore(): + """ + Restore all mocked objects. + + """ + global mocked + + # Restore the objects in the reverse order of their mocking to assure + # the original state is retrieved. + while mocked: + original, nsdict, name, attrs = mocked.pop() + if not attrs: + nsdict[name] = original + else: + tmp = nsdict[name] + for attr in attrs[:-1]: + tmp = getattr(tmp, attr) + setattr(tmp, attrs[-1], original) + return + +class Mock(object): + + def __init__(self, name, returns=None, returns_iter=None, + returns_func=None, raises=None, + outfile=None): + self.mock_name = name + self.mock_returns = returns + if returns_iter is not None: + returns_iter = iter(returns_iter) + self.mock_returns_iter = returns_iter + self.mock_returns_func = returns_func + self.mock_raises = raises + if outfile is None: + outfile = sys.stdout + self.mock_outfile = outfile + self.mock_attrs = {} + + def __repr__(self): + return '<Mock %s %s>' % (hex(id(self)), self.mock_name) + + def __call__(self, *args, **kw): + parts = [repr(a) for a in args] + parts.extend( + '%s=%r' % (items) for items in sorted(kw.items())) + msg = 'Called %s(%s)' % (self.mock_name, ', '.join(parts)) + if len(msg) > 80: + msg = 'Called %s(\n %s)' % ( + self.mock_name, ',\n '.join(parts)) + self.mock_outfile.write("%s\n" % msg) + return self._mock_return(*args, **kw) + + def _mock_return(self, *args, **kw): + if self.mock_raises is not None: + raise self.mock_raises + elif self.mock_returns is not None: + return self.mock_returns + elif self.mock_returns_iter is not None: + try: + return self.mock_returns_iter.next() + except StopIteration: + raise Exception("No more mock return values are present.") + elif self.mock_returns_func is not None: + return self.mock_returns_func(*args, **kw) + else: + return None + + def __getattr__(self, attr): + if attr not in self.mock_attrs: + if self.mock_name: + new_name = self.mock_name + '.' + attr + else: + new_name = attr + self.mock_attrs[attr] = Mock(new_name, + outfile=self.mock_outfile) + return self.mock_attrs[attr] + +__test__ = { + "mock" : + r""" + An additional test for mocking a function accessed directly (i.e. + not via object attributes). + + >>> import os + >>> rename = os.rename + >>> orig_id = id(rename) + >>> mock("rename") + >>> mock_id = id(rename) + >>> mock("rename") + >>> mock_id != id(rename) + True + >>> restore() + >>> orig_id == id(rename) == id(os.rename) + True + + The example from the module docstring, done with the mock/restore + functions. + + >>> import smtplib + >>> def send_email(from_addr, to_addr, subject, body): + ... conn = smtplib.SMTP('localhost') + ... msg = 'To: %s\nFrom: %s\nSubject: %s\n\n%s' % ( + ... to_addr, from_addr, subject, body) + ... conn.sendmail(from_addr, [to_addr], msg) + ... conn.quit() + + >>> mock("smtplib.SMTP", returns=Mock('smtp_connection')) + >>> send_email('[EMAIL PROTECTED]', '[EMAIL PROTECTED]', + ... 'Hi there!', 'How is it going?') + Called smtplib.SMTP('localhost') + Called smtp_connection.sendmail( + '[EMAIL PROTECTED]', + ['[EMAIL PROTECTED]'], + 'To: [EMAIL PROTECTED]: [EMAIL PROTECTED]: Hi there!\n\nHow is it going?') + Called smtp_connection.quit() + >>> restore() + + """, +} + +if __name__ == '__main__': + import doctest + doctest.testmod(optionflags=doctest.ELLIPSIS) === added file 'test/scaffold.py' --- test/scaffold.py 1970-01-01 00:00:00 +0000 +++ test/scaffold.py 2008-04-30 03:47:46 +0000 @@ -0,0 +1,368 @@ +# -*- coding: utf-8 -*- + +# scaffold.py +# +# Copyright © 2007-2008 Ben Finney <[EMAIL PROTECTED]> +# This is free software; you may copy, modify and/or distribute this work +# under the terms of the GNU General Public License, version 2 or later. +# No warranty expressed or implied. See the file LICENSE for details. + +""" Scaffolding for unit test modules +""" + +import unittest +import doctest +import logging +import os +import sys +import textwrap +from StringIO import StringIO +from minimock import ( + Mock, + mock, + restore as mock_restore, + ) + +test_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(test_dir) +if not test_dir in sys.path: + sys.path.insert(1, test_dir) +if not parent_dir in sys.path: + sys.path.insert(1, parent_dir) +bin_dir = os.path.join(parent_dir, "bin") + +# Disable all but the most critical logging messages +logging.disable(logging.CRITICAL) + + +def suite(module_name): + """ Create the test suite for named module """ + from sys import modules + loader = unittest.TestLoader() + suite = loader.loadTestsFromModule(modules[module_name]) + return suite + +def unittest_main(argv=None): + """ Mainline function for each unit test module """ + + from sys import argv as sys_argv + if not argv: + argv = sys_argv + + exitcode = None + try: + unittest.main(argv=argv, defaultTest='suite') + except SystemExit, e: + exitcode = e.code + + return exitcode + + +def make_module_from_file(module_name, file_name): + """ Make a new module object from the code in specified file """ + + from types import ModuleType + module = ModuleType(module_name) + + module_file = open(file_name, 'r') + exec module_file in module.__dict__ + + return module + + +class TestCase(unittest.TestCase): + """ Test case behaviour """ + + def failUnlessRaises(self, exc_class, func, *args, **kwargs): + """ Fail unless the function call raises the expected exception + + Fail the test if an instance of the exception class + ``exc_class`` is not raised when calling ``func`` with the + arguments ``*args`` and ``**kwargs``. + + """ + + try: + super(TestCase, self).failUnlessRaises( + exc_class, func, *args, **kwargs) + except self.failureException: + exc_class_name = exc_class.__name__ + msg = ( + "Exception %(exc_class_name)s not raised" + " for function call:" + " func=%(func)r args=%(args)r kwargs=%(kwargs)r" + ) % vars() + raise self.failureException(msg) + + + def failIfIs(self, first, second, msg=None): + """ Fail if the two objects are identical + + Fail the test if ``first`` and ``second`` are identical, + as determined by the ``is`` operator. + + """ + + if first is second: + if msg is None: + msg = "%(first)r is %(second)r" % vars() + raise self.failureException(msg) + + def failUnlessIs(self, first, second, msg=None): + """ Fail unless the two objects are identical + + Fail the test unless ``first`` and ``second`` are + identical, as determined by the ``is`` operator. + + """ + + if first is not second: + if msg is None: + msg = "%(first)r is not %(second)r" % vars() + raise self.failureException(msg) + + assertIs = failUnlessIs + assertNotIs = failIfIs + + def failIfIn(self, first, second, msg=None): + """ Fail if the second object is in the first + + Fail the test if ``first`` contains ``second``, as + determined by the ``in`` operator. + + """ + + if second in first: + if msg is None: + msg = "%(second)r is in %(first)r" % vars() + raise self.failureException(msg) + + def failUnlessIn(self, first, second, msg=None): + """ Fail unless the second object is in the first + + Fail the test unless ``first`` contains ``second``, as + determined by the ``in`` operator. + + """ + + if second not in first: + if msg is None: + msg = "%(second)r is not in %(first)r" % vars() + raise self.failureException(msg) + + assertIn = failUnlessIn + assertNotIn = failIfIn + + def failUnlessOutputCheckerMatch(self, want, got, msg=None): + """ Fail unless the specified string matches the expected + + Fail the test unless ``want`` matches ``got``, as + determined by a ``doctest.OutputChecker`` instance. This + is not an equality check, but a pattern match according to + the OutputChecker rules. + + """ + + checker = doctest.OutputChecker() + want = textwrap.dedent(want) + got = textwrap.dedent(got) + if not checker.check_output(want, got, doctest.ELLIPSIS): + if msg is None: + msg = ("Expected %(want)r, got %(got)r:" + "\n--- want: ---\n%(want)s" + "\n--- got: ---\n%(got)s") % vars() + raise self.failureException(msg) + + assertOutputCheckerMatch = failUnlessOutputCheckerMatch + + def failIfIsInstance(self, obj, classes): + """ Fail if the object is an instance of the specified classes + + Fail the test if the object ``obj`` is an instance of any + of ``classes``. + + """ + + if isinstance(obj, classes): + msg = "%(obj)r is an instance of one of %(classes)r" % vars() + raise self.failureException(msg) + + def failUnlessIsInstance(self, obj, classes): + """ Fail unless the object is an instance of the specified classes + + Fail the test unless the object ``obj`` is an instance of + any of ``classes``. + + """ + + if not isinstance(obj, classes): + msg = "%(obj)r is not an instance of any of %(classes)r" % vars() + raise self.failureException(msg) + + assertIsInstance = failUnlessIsInstance + assertNotIsInstance = failIfIsInstance + + def failUnlessFunctionInTraceback(self, traceback, function): + """ Fail if the function is not in the traceback + + Fail the test if the function ``function`` is not at any + of the levels in the traceback object ``traceback``. + + """ + + func_in_traceback = False + expect_code = function.func_code + current_traceback = traceback + while current_traceback is not None: + if expect_code is current_traceback.tb_frame.f_code: + func_in_traceback = True + break + current_traceback = current_traceback.tb_next + + if not func_in_traceback: + msg = ("Traceback did not lead to original function" + " %(function)s" + ) % vars() + raise self.failureException(msg) + + assertFunctionInTraceback = failUnlessFunctionInTraceback + + +class Test_Exception(TestCase): + """ Test cases for exception classes """ + + def __init__(self, *args, **kwargs): + """ Set up a new instance """ + self.valid_exceptions = NotImplemented + super(Test_Exception, self).__init__(*args, **kwargs) + + def setUp(self): + """ Set up test fixtures """ + for exc_type, params in self.valid_exceptions.items(): + args = (None,) * params['min_args'] + params['args'] = args + instance = exc_type(*args) + params['instance'] = instance + + self.iterate_params = make_params_iterator( + default_params_dict = self.valid_exceptions + ) + + super(Test_Exception, self).setUp() + + def test_exception_instance(self): + """ Exception instance should be created """ + for key, params in self.iterate_params(): + instance = params['instance'] + self.failIfIs(None, instance) + + def test_exception_types(self): + """ Exception instances should match expected types """ + for key, params in self.iterate_params(): + instance = params['instance'] + for match_type in params['types']: + match_type_name = match_type.__name__ + fail_msg = ( + "%(instance)r is not an instance of" + " %(match_type_name)s" + ) % vars() + self.failUnless( + isinstance(instance, match_type), + msg=fail_msg) + + +class Test_ProgramMain(TestCase): + """ Test cases for program __main__ function + + Tests a module-level function named __main__ with behaviour + inspired by Guido van Rossum's post "Python main() functions" + <URL:http://www.artima.com/weblogs/viewpost.jsp?thread=4829>. + + It expects: + * the program module has a __main__ function, that: + * accepts an 'argv' argument, defaulting to sys.argv + * instantiates a program application class + * calls the application's main() method, passing argv + * catches SystemExit and returns the error code + * the application behaviour is defined in a class, that: + * has an __init__() method accepting an 'argv' argument as + the commandline argument list to parse + * has a main() method responsible for running the program, + and returning on successful program completion + * raises SystemExit when an abnormal exit is required + """ + + def __init__(self, *args, **kwargs): + """ Set up a new instance """ + self.program_module = NotImplemented + self.application_class = NotImplemented + super(Test_ProgramMain, self).__init__(*args, **kwargs) + + def setUp(self): + """ Set up test fixtures """ + self.mock_outfile = StringIO() + + self.app_class_name = self.application_class.__name__ + self.mock_app = Mock("test_app", outfile=self.mock_outfile) + self.mock_app_class = Mock(self.app_class_name, + outfile=self.mock_outfile) + self.mock_app_class.mock_returns = self.mock_app + mock(self.app_class_name, mock_obj=self.mock_app_class, + nsdicts=[self.program_module.__dict__]) + + super(Test_ProgramMain, self).setUp() + + def tearDown(self): + """ Tear down test fixtures """ + mock_restore() + super(Test_ProgramMain, self).tearDown() + + def test_main_should_instantiate_app(self): + """ __main__() should instantiate application class """ + app_class_name = self.app_class_name + argv = ["foo", "bar"] + expect_mock_output = """\ + Called %(app_class_name)s(%(argv)r)... + """ % vars() + self.program_module.__main__(argv) + self.failUnlessOutputCheckerMatch( + expect_mock_output, self.mock_outfile.getvalue()) + + def test_main_should_call_app_main(self): + """ __main__() should call the application main method """ + argv = ["foo", "bar"] + app_class_name = self.app_class_name + expect_mock_output = """\ + Called %(app_class_name)s(%(argv)r) + Called test_app.main() + """ % vars() + self.program_module.__main__(argv) + self.failUnlessOutputCheckerMatch( + expect_mock_output, self.mock_outfile.getvalue()) + + def test_main_no_argv_should_supply_sys_argv(self): + """ __main__() with no argv should supply sys.argv to application """ + sys_argv_test = ["foo", "bar"] + mock("sys.argv", mock_obj=sys_argv_test) + app_class_name = self.app_class_name + expect_mock_output = """\ + Called %(app_class_name)s(%(sys_argv_test)r) + Called test_app.main() + """ % vars() + self.program_module.__main__() + self.failUnlessOutputCheckerMatch( + expect_mock_output, self.mock_outfile.getvalue()) + + def test_main_should_return_none_on_success(self): + """ __main__() should return None when no SystemExit raised """ + expect_exit_code = None + exit_code = self.program_module.__main__() + self.failUnlessEqual(expect_exit_code, exit_code) + + def test_main_should_return_exit_code_on_system_exit(self): + """ __main__() should return application SystemExit code """ + expect_exit_code = object() + self.mock_app.main.mock_raises = SystemExit(expect_exit_code) + exit_code = self.program_module.__main__() + self.failUnlessEqual(expect_exit_code, exit_code) + === added file 'test/suite.py' --- test/suite.py 1970-01-01 00:00:00 +0000 +++ test/suite.py 2008-04-30 03:08:08 +0000 @@ -0,0 +1,59 @@ +# -*- coding:utf-8; -*- + +# test/suite.py +# Part of chardet, the Universal Encoding Detector. +# +# Copyright © 2008 Ben Finney <[EMAIL PROTECTED]> +# This is free software; you may copy, modify and/or distribute this work +# under the terms of the GNU General Public License, version 2 or later. +# No warranty expressed or implied. See the file COPYING for details. + +""" Unit test suite for chardet +""" + +import unittest +import sys +import os + + +def get_python_modules(file_list, file_suffix = '.py'): + """ Return a list of module names from a filename list """ + python_modules = [m[:m.rfind(file_suffix)] for m in file_list + if m.endswith(file_suffix)] + return python_modules + +def get_test_modules(module_list, module_prefix = 'test_'): + """ Return the list of modules that are named as test modules """ + test_modules = [m for m in module_list + if m.startswith(module_prefix)] + return test_modules + + +def suite(): + """ Create the test suite for this module """ + loader = unittest.TestLoader() + test_dir = os.path.dirname(__file__) + python_modules = get_python_modules(os.listdir(test_dir)) + module_list = get_test_modules(python_modules) + suite = loader.loadTestsFromNames(module_list) + + return suite + + +def __main__(argv=None): + """ Mainline function for this module """ + from sys import argv as sys_argv + if not argv: + argv = sys_argv + + exitcode = None + try: + unittest.main(argv=argv, defaultTest='suite') + except SystemExit, e: + exitcode = e.code + + return exitcode + +if __name__ == '__main__': + exitcode = __main__(sys.argv) + sys.exit(exitcode)
=== added directory 'bin' === added file 'bin/detect-encoding' --- bin/detect-encoding 1970-01-01 00:00:00 +0000 +++ bin/detect-encoding 2008-05-03 05:13:33 +0000 @@ -0,0 +1,159 @@ +#! /usr/bin/python +# -*- coding: utf-8 -*- + +# bin/detect-encoding +# Part of chardet, the Universal Encoding Detector. +# +# Copyright © 2008 Ben Finney <[EMAIL PROTECTED]> +# This is free software; you may copy, modify and/or distribute this work +# under the terms of the GNU General Public License, version 2 or later. +# No warranty expressed or implied. See the file COPYING for details. + +""" %prog [options] [file ...] + +Report heuristically-detected character encoding for each file. + +For every specified file (defaulting to stdin if no files are +specified), reads and determines the character encoding of the file +content. Reports the name and confidence level for each file's +detected character encoding. +""" + +import sys +import optparse +import chardet + + +class OptionParser(optparse.OptionParser, object): + """ Command-line parser for this program """ + + def __init__(self, *args, **kwargs): + """ Set up a new instance """ + super(OptionParser, self).__init__(*args, **kwargs) + + global __doc__ + self.usage = __doc__.strip() + + +def detect_encoding(in_file): + """ Detect encoding of text in `in_file` + + Parameters + in_file + Opened file object to read and examine. + + Return value + The mapping as returned by `chardet.detect`. + + """ + in_data = in_file.read() + params = chardet.detect(in_data) + return params + + +def report_file_encoding(in_file, encoding_params): + """ Return a report of the file's encoding + + Parameters + in_file + File object being reported. Should have an appropriate + `name` attribute. + + encoding_params + Mapping as returned by `detect_encoding` on the file's + data. + + Return value + The report is a single line of text showing filename, + detected encoding, and detection confidence. + + """ + file_name = in_file.name + encoding_name = encoding_params['encoding'] + confidence = encoding_params['confidence'] + report = ( + "%(file_name)s: %(encoding_name)s" + " (confidence: %(confidence)0.2f)" + ) % vars() + return report + + +def process_file(in_file): + """ Process a single file + + Parameters + in_file + Opened file object to read and examine. + + Return value + None. + + Reads the file contents, detects the encoding, and writes a + report line to stdout. + """ + encoding_params = detect_encoding(in_file) + encoding_report = report_file_encoding(in_file, encoding_params) + message = "%(encoding_report)s\n" % vars() + sys.stdout.write(message) + + +class DetectEncodingApp(object): + """ Application behaviour for 'detect-encoding' program """ + + def __init__(self, argv): + """ Set up a new instance """ + self._parse_commandline(argv) + + def _parse_commandline(self, argv): + """ Parse command-line arguments """ + option_parser = OptionParser() + (options, args) = option_parser.parse_args(argv[1:]) + self.file_names = args + + def _emit_file_error(self, file_name, error): + """ Emit an error message regarding file processing """ + error_name = error.__class__.__name__ + message = ( + "%(file_name)s: %(error_name)s: %(error)s\n" + ) % vars() + sys.stderr.write(message) + + def _process_all_files(self, file_names): + """ Process all files in list """ + if not len(file_names): + file_names = [None] + for file_name in file_names: + try: + if file_name is None: + file_name = sys.stdin.name + in_file = sys.stdin + else: + in_file = open(file_name) + process_file(in_file) + except IOError, exc: + self._emit_file_error(file_name, exc) + + def main(self): + """ Main entry point for application """ + self._process_all_files(self.file_names) + + +def __main__(argv=None): + """ Mainline code for this program """ + + from sys import argv as sys_argv + if argv is None: + argv = sys_argv + + app = DetectEncodingApp(argv) + exitcode = None + try: + app.main() + except SystemExit, e: + exitcode = e.code + + return exitcode + +if __name__ == "__main__": + exitcode = __main__(argv=sys.argv) + sys.exit(exitcode) === added file 'test/test_detect_encoding.py' --- test/test_detect_encoding.py 1970-01-01 00:00:00 +0000 +++ test/test_detect_encoding.py 2008-05-03 05:09:39 +0000 @@ -0,0 +1,558 @@ +# -*- coding: utf-8; -*- + +# test/test_detect_encoding.py +# Part of chardet, the Universal Encoding Detector. +# +# Copyright © 2008 Ben Finney <[EMAIL PROTECTED]> +# This is free software; you may copy, modify and/or distribute this work +# under the terms of the GNU General Public License, version 2 or later. +# No warranty expressed or implied. See the file COPYING for details. + +""" Unit test suite for the 'detect-encoding' command-line program +""" + +import __builtin__ +import sys +import os +import stat +from StringIO import StringIO + +import scaffold +from scaffold import TestCase +from minimock import ( + Mock, + mock, + restore as mock_restore, + ) +import chardet + + +module_name = 'detect_encoding' +module_file_under_test = os.path.join(scaffold.bin_dir, 'detect-encoding') +detect_encoding = scaffold.make_module_from_file( + module_name, module_file_under_test + ) + +class Test_ProgramFile(TestCase): + """ Test cases for executable program """ + + def test_program_is_executable(self): + """ Program file should be executable """ + file_mode = os.stat(module_file_under_test).st_mode + exec_bit = stat.S_IEXEC + self.failUnlessEqual(exec_bit, (file_mode & exec_bit)) + + +class Test_OptionParser(TestCase): + """ Test cases for OptionParser class """ + + def setUp(self): + """ Set up test fixtures """ + self.instance = detect_encoding.OptionParser() + + def test_usage_contains_module_docstring(self): + """ Usage message should contain the module docstring + + To reduce unnecessary duplication, the module docstring + for the program should be used for the program usage + message. + + """ + instance = self.instance + module_docstring_stripped = detect_encoding.__doc__.strip() + self.failUnlessIn(instance.usage, module_docstring_stripped) + + def test_usage_contains_program_name_placeholder(self): + """ Usage message should contain program name placeholder + + The OptionParser usage message generation will replace the + '%prog' placeholder with the name of the running program, + so this placeholder should be in the usage message string. + + """ + instance = self.instance + progname_placeholder = "%prog" + self.failUnlessIn(instance.usage, progname_placeholder) + + +def setup_file_encoding_fixture(testcase): + """ Set up test parameters for files with encodings """ + testcase.valid_file_params = { + "foo.txt": dict( + encoding = "spam.eggs", + confidence = 1.0, + ), + "bar": dict( + encoding = "beans.tofu", + confidence = 0.333333333, + ), + "baz.dat": dict( + encoding = "bacon.muffin", + confidence = 0.666666667, + ), + } + + for (file_name, params) in testcase.valid_file_params.items(): + params['file_name'] = file_name + instance = StringIO() + instance.name = file_name + params['instance'] = instance + encoding_params = dict( + encoding=params['encoding'], + confidence=params['confidence'], + ) + params['encoding_params'] = encoding_params + + +class Test_detect_encoding(TestCase): + """ Test cases for ``detect_encoding`` function """ + + def setUp(self): + """ Set up test fixtures """ + self.mock_outfile = StringIO() + + mock( + 'chardet.detect', + outfile=self.mock_outfile) + + self.in_data = object() + self.in_file = Mock("file", outfile=self.mock_outfile) + self.in_file.read.mock_returns = self.in_data + + def test_reads_file_contents(self): + """ detect_encoding should read contents of file """ + expect_mock_output = """\ + Called file.read() + ... + """ % vars() + dummy = detect_encoding.detect_encoding(self.in_file) + self.failUnlessOutputCheckerMatch( + expect_mock_output, self.mock_outfile.getvalue() + ) + + def test_uses_chardet_detect(self): + """ detect_encoding should use chardet.detect() """ + expect_mock_output = """\ + ... + Called chardet.detect(%(in_data)r) + """ % vars(self) + dummy = detect_encoding.detect_encoding(self.in_file) + self.failUnlessOutputCheckerMatch( + expect_mock_output, self.mock_outfile.getvalue() + ) + + def test_returns_expected_parameters(self): + """ detect_encoding should return expected encoding parameters """ + expect_encoding_params = object() + chardet.detect.mock_returns = expect_encoding_params + encoding_params = detect_encoding.detect_encoding(self.in_file) + self.failUnlessIs(expect_encoding_params, encoding_params) + + +class Test_report_file_encoding(TestCase): + """ Test cases for ``report_file_encoding`` function """ + + def setUp(self): + """ Set up test fixtures """ + self.mock_outfile = StringIO() + + setup_file_encoding_fixture(self) + + def test_report_contains_filename(self): + """ report_file_encoding() result should contain filename + + The report text returned from report_file_encoding() should + contain the name of the supplied file. + + """ + for params in self.valid_file_params.values(): + file_name = params['file_name'] + in_file = params['instance'] + encoding_params = params['encoding_params'] + args = dict( + in_file=in_file, + encoding_params=encoding_params, + ) + report = detect_encoding.report_file_encoding(**args) + self.failUnlessIn(report, in_file.name) + + def test_report_contains_encoding(self): + """ report_file_encoding() result should contain encoding + + The report text returned from report_file_encoding() + should contain the name of the detected encoding. + + """ + for params in self.valid_file_params.values(): + in_file = params['instance'] + encoding_params = params['encoding_params'] + encoding_name = encoding_params['encoding'] + args = dict( + in_file=in_file, + encoding_params=encoding_params, + ) + report = detect_encoding.report_file_encoding(**args) + self.failUnlessIn(report, encoding_name) + + def test_report_contains_confidence_score(self): + """ report_file_encoding() result should contain confidence score + + The report text returned from report_file_encoding() + should contain the confidence score of the detection. + + """ + for params in self.valid_file_params.values(): + in_file = params['instance'] + encoding_params = params['encoding_params'] + confidence = encoding_params['confidence'] + confidence_text = "%(confidence)0.2f" % vars() + args = dict( + in_file=in_file, + encoding_params=encoding_params, + ) + report = detect_encoding.report_file_encoding(**args) + self.failUnlessIn(report, confidence_text) + + +class Test_process_file(TestCase): + """ Test cases for process_file function """ + + def setUp(self): + """ Set up test fixtures """ + + self.mock_outfile = StringIO() + + self.app_class = detect_encoding.DetectEncodingApp + setup_DetectEncodingApp_fixture(self) + app_params = self.valid_app_params['no files'] + self.app_instance = app_params['instance'] + + mock( + 'detect_encoding.detect_encoding', + outfile=self.mock_outfile) + detect_encoding.detect_encoding.mock_returns_iter = ( + f['encoding_params'] + for f in self.valid_file_params.values() + ) + mock( + 'detect_encoding.report_file_encoding', + outfile=self.mock_outfile) + fake_report_text = str(object()) + detect_encoding.report_file_encoding.mock_returns = ( + fake_report_text) + mock( + 'sys.stdout', + outfile=self.mock_outfile) + + def tearDown(self): + """ Tear down test fixtures """ + mock_restore() + + def test_uses_detect_encoding_on_file(self): + """ Should call detect_encoding with file parameter """ + for file_params in self.valid_file_params.values(): + in_file = file_params['instance'] + encoding_params = file_params['encoding_params'] + expect_mock_output = """\ + Called detect_encoding.detect_encoding( + %(instance)r) + ... + """ % file_params + detect_encoding.process_file(in_file) + self.failUnlessOutputCheckerMatch( + expect_mock_output, self.mock_outfile.getvalue() + ) + self.mock_outfile.truncate(0) + + def test_uses_report_file_encoding_on_parameters(self): + """ Should call report_file_encoding with report parameters """ + for file_params in self.valid_file_params.values(): + in_file = file_params['instance'] + expect_mock_output = """\ + ... + Called detect_encoding.report_file_encoding( + %(instance)r, + %(encoding_params)r) + ... + """ % file_params + detect_encoding.process_file(in_file) + self.failUnlessOutputCheckerMatch( + expect_mock_output, self.mock_outfile.getvalue() + ) + self.mock_outfile.truncate(0) + + def test_writes_report_to_stdout(self): + """ Should write report for file to stdout """ + for file_params in self.valid_file_params.values(): + fake_report_text = str(object()) + detect_encoding.report_file_encoding.mock_returns = ( + fake_report_text) + in_file = file_params['instance'] + expect_mock_output = """\ + ... + Called sys.stdout.write('...%(fake_report_text)s...') + """ % vars() + detect_encoding.process_file(in_file) + self.failUnlessOutputCheckerMatch( + expect_mock_output, self.mock_outfile.getvalue() + ) + self.mock_outfile.truncate(0) + + +def setup_DetectEncodingApp_fixture(testcase): + """ Set up a DetectEncodingApp test fixture on the test case + + `testcase` + The unit test case to which the fixture should be applied. + Must have an attribute `app_class` which is the type to + instantiate for the application instances. + + The fixture is applied as the `valid_apps` attribute, and is a + dict of parameter dicts to use in the test case. + + """ + + setup_file_encoding_fixture(testcase) + + testcase.valid_app_params = { + 'no files': dict( + file_names = [], + ), + 'one file': dict( + file_names = [ + "foo.txt", + ], + ), + 'three files': dict( + file_names = [ + "foo.txt", "bar", "baz.dat", + ], + ), + } + + for params in testcase.valid_app_params.values(): + argv = [] + cmd_args = params.get('cmd_args', ["foo_prog"]) + file_names = params['file_names'] + in_files = [] + for file_name in file_names: + in_file = StringIO() + in_file.name = file_name + in_files.append(in_file) + else: + in_file = StringIO() + in_file.name = "<stdin>" + in_files = [in_file] + params['in_files'] = in_files + cmd_args.extend(file_names) + argv.extend(cmd_args) + params['argv'] = argv + args = dict( + argv=argv + ) + params['args'] = args + instance = testcase.app_class(**args) + params['instance'] = instance + + +class Test_DetectEncodingApp_init(TestCase): + """ Test cases for DetectCodingApp class initialisation """ + + def setUp(self): + """ Set up test fixtures """ + + self.mock_outfile = StringIO() + + self.app_class = detect_encoding.DetectEncodingApp + setup_DetectEncodingApp_fixture(self) + + def tearDown(self): + """ Tear down test fixtures """ + mock_restore() + + def test_requires_argv(self): + """ Shoudl require argv parameter """ + args = dict() + self.failUnlessRaises(TypeError, self.app_class, **args) + + def test_parses_args(self): + """ Should parse command-line arguments """ + for params in self.valid_app_params.values(): + args = params['args'] + argv = args['argv'] + + argv_to_parse = argv[1:] + args_return = argv_to_parse + mock_option_parser = Mock( + "OptionParser", + outfile=self.mock_outfile) + stub_parse_args_return = (object(), args_return) + mock_option_parser.parse_args.mock_returns = ( + stub_parse_args_return) + mock( + 'detect_encoding.OptionParser', + returns=mock_option_parser, + outfile=self.mock_outfile) + + expect_mock_output = """\ + Called detect_encoding.OptionParser() + Called OptionParser.parse_args(%(argv_to_parse)r) + """ % vars() + instance = self.app_class(**args) + self.failUnlessOutputCheckerMatch( + expect_mock_output, self.mock_outfile.getvalue()) + self.mock_outfile.truncate(0) + + def test_stores_specified_args(self): + """ Should store specified command-line arguments """ + for params in self.valid_app_params.values(): + instance = params['instance'] + app_args = params['args'] + expect_file_names = params['file_names'] + self.failUnlessEqual(expect_file_names, instance.file_names) + + +class Test_DetectEncodingApp_main(TestCase): + """ Test cases for DetectEncodingApp.main method """ + + def setUp(self): + """ Set up test fixtures """ + + self.mock_outfile = StringIO() + + self.app_class = detect_encoding.DetectEncodingApp + setup_DetectEncodingApp_fixture(self) + + mock( + 'detect_encoding.process_file', + outfile=self.mock_outfile) + fake_file = self.valid_file_params['foo.txt']['instance'] + mock( + '__builtin__.open', + returns=fake_file, + outfile=self.mock_outfile) + + def tearDown(self): + """ Tear down test fixtures """ + mock_restore() + + def test_with_no_files_processes_stdin(self): + """ With no files specified, should process sys.stdin """ + params = self.valid_app_params['no files'] + instance = params['instance'] + in_file = Mock("sys.stdin", outfile=self.mock_outfile) + mock( + 'sys.stdin', mock_obj=in_file) + expect_mock_output = """\ + Called detect_encoding.process_file(%(in_file)r) + """ % vars() + instance.main() + mock_restore() + self.failUnlessOutputCheckerMatch( + expect_mock_output, self.mock_outfile.getvalue() + ) + + def test_processes_specified_files(self): + """ With filenames, should process each file in turn """ + params = self.valid_app_params['three files'] + file_names = params['file_names'] + instance = params['instance'] + in_files = [] + expect_mock_output_segments = [] + for file_name in file_names: + in_file = self.valid_file_params[file_name]['instance'] + in_files.append(in_file) + expect_mock_output_segments.append( + ("""\ + Called __builtin__.open(%(file_name)r) + Called detect_encoding.process_file(%(in_file)r)""" + ) % vars() + ) + mock( + '__builtin__.open', returns_iter=in_files, + outfile=self.mock_outfile) + expect_mock_output = "\n".join(expect_mock_output_segments) + "\n" + instance.main() + mock_restore() + self.failUnlessOutputCheckerMatch( + expect_mock_output, self.mock_outfile.getvalue() + ) + + def test_emits_message_on_open_ioerror(self): + """ IOError from open should cause error message """ + params = self.valid_app_params['one file'] + instance = params['instance'] + error_instance = IOError("Badness!") + error_name = error_instance.__class__.__name__ + __builtin__.open.mock_raises = error_instance + mock( + 'sys.stderr', + outfile=self.mock_outfile) + expect_mock_output = """\ + ... + Called sys.stderr.write('...%(error_name)s...%(error_instance)s...') + """ % vars() + instance.main() + mock_restore() + self.failUnlessOutputCheckerMatch( + expect_mock_output, self.mock_outfile.getvalue() + ) + + def test_continues_to_next_file_after_ioerror(self): + """ Should proceed to next file after IOError """ + params = self.valid_app_params['three files'] + instance = params['instance'] + file_names = params['file_names'] + error_instance = IOError("Badness!") + mock( + 'sys.stderr', + outfile=self.mock_outfile) + error_file_name = file_names[1] + def stub_open(file_name, *args, **kwargs): + if file_name == error_file_name: + raise error_instance + else: + return Mock("file", outfile=self.mock_outfile) + mock( + '__builtin__.open', mock_obj=stub_open, + outfile=self.mock_outfile) + expect_mock_output = """\ + Called detect_encoding.process_file(...) + Called sys.stderr.write(...) + Called detect_encoding.process_file(...) + """ % vars() + instance.main() + mock_restore() + self.failUnlessOutputCheckerMatch( + expect_mock_output, self.mock_outfile.getvalue() + ) + + def test_emits_message_on_detect_encoding_ioerror(self): + """ IOError from detect_encoding should cause error message """ + params = self.valid_app_params['one file'] + instance = params['instance'] + error_instance = IOError("Badness!") + error_name = error_instance.__class__.__name__ + detect_encoding.process_file.mock_raises = error_instance + mock( + 'sys.stderr', + outfile=self.mock_outfile) + expect_mock_output = """\ + ... + Called sys.stderr.write('...%(error_name)s...%(error_instance)s...') + """ % vars() + instance.main() + mock_restore() + self.failUnlessOutputCheckerMatch( + expect_mock_output, self.mock_outfile.getvalue() + ) + + +class Test_ProgramMain(scaffold.Test_ProgramMain): + """ Test cases for program __main__ function """ + + def setUp(self): + """ Set up a new instance """ + self.program_module = detect_encoding + self.application_class = detect_encoding.DetectEncodingApp + super(Test_ProgramMain, self).setUp()
signature.asc
Description: Digital signature