Hello community, here is the log from the commit of package python-sas7bdat for openSUSE:Factory checked in at 2018-12-24 11:48:02 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-sas7bdat (Old) and /work/SRC/openSUSE:Factory/.python-sas7bdat.new.28833 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-sas7bdat" Mon Dec 24 11:48:02 2018 rev:3 rq:660761 version:2.2.1 Changes: -------- --- /work/SRC/openSUSE:Factory/python-sas7bdat/python-sas7bdat.changes 2018-07-31 16:01:56.179804991 +0200 +++ /work/SRC/openSUSE:Factory/.python-sas7bdat.new.28833/python-sas7bdat.changes 2018-12-24 11:48:03.489098735 +0100 @@ -1,0 +2,17 @@ +Sat Dec 22 05:24:22 UTC 2018 - Todd R <toddrme2...@gmail.com> + +- Update to 2.2.1 + * note how to install + * suggest use of 'skip_header' + * add documentation around getting column information + * use more descriptive variable name for SAS7BDAT instance in README + * bump version to fix pypi documentation formatting + * revert setup.py + * use long_description_content_type + * rewrite RDCDecompressor + * Add the ability to use file handles, in addition to supplying a path. If the file being accessed is not within the file system (for example, if it is compressed into a zip or tar file) it cannot be opened from the supplied path. This change adds the ability to supply a file handle, and for the module to use that, rather than opening the file itself. + * fix dist + * add encoding argument to convert_file() + * add license; update build script + +------------------------------------------------------------------- Old: ---- LICENSE sas7bdat-2.0.7.tar.gz New: ---- sas7bdat-2.2.1.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-sas7bdat.spec ++++++ --- /var/tmp/diff_new_pack.xnhRMg/_old 2018-12-24 11:48:04.057098237 +0100 +++ /var/tmp/diff_new_pack.xnhRMg/_new 2018-12-24 11:48:04.061098233 +0100 @@ -18,21 +18,18 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-sas7bdat -Version: 2.0.7 +Version: 2.2.1 Release: 0 -# For the license -%define tag da1faa90d0b15c2c97a2a8eb86c91c58081bdd86 Summary: A sas7bdat file reader for Python License: MIT Group: Development/Languages/Python Url: https://bitbucket.org/jaredhobbs/sas7bdat Source: https://files.pythonhosted.org/packages/source/s/sas7bdat/sas7bdat-%{version}.tar.gz -Source10: https://bitbucket.org/jaredhobbs/sas7bdat/raw/%{tag}/LICENSE BuildRequires: %{python_module devel} -BuildRequires: %{python_module six} +BuildRequires: %{python_module six >= 1.8.0} BuildRequires: fdupes BuildRequires: python-rpm-macros -Requires: python-six +Requires: python-six >= 1.8.0 BuildArch: noarch %python_subpackages @@ -50,7 +47,6 @@ %prep %setup -q -n sas7bdat-%{version} -cp %{SOURCE10} . sed -i 's/\r$//' README.md %build ++++++ sas7bdat-2.0.7.tar.gz -> sas7bdat-2.2.1.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sas7bdat-2.0.7/LICENSE new/sas7bdat-2.2.1/LICENSE --- old/sas7bdat-2.0.7/LICENSE 1970-01-01 01:00:00.000000000 +0100 +++ new/sas7bdat-2.2.1/LICENSE 2018-05-24 21:58:10.000000000 +0200 @@ -0,0 +1,19 @@ +Copyright (c) 2015-2018 Jared Hobbs + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sas7bdat-2.0.7/PKG-INFO new/sas7bdat-2.2.1/PKG-INFO --- old/sas7bdat-2.0.7/PKG-INFO 2016-01-07 02:29:27.000000000 +0100 +++ new/sas7bdat-2.2.1/PKG-INFO 2018-11-05 06:24:29.000000000 +0100 @@ -1,12 +1,58 @@ Metadata-Version: 1.1 Name: sas7bdat -Version: 2.0.7 +Version: 2.2.1 Summary: A sas7bdat file reader for Python Home-page: https://bitbucket.org/jaredhobbs/sas7bdat Author: Jared Hobbs Author-email: ja...@pyhacker.com License: MIT -Description: UNKNOWN +Description: sas7bdat.py + =========== + + This module will read sas7bdat files using pure Python (2.6+, 3+). No + SAS software required! The module started out as a port of the R script + of the same name found here: https://github.com/BioStatMatt/sas7bdat but + has since been completely rewritten. + + Also included with this library is a simple command line script, + ``sas7bdat_to_csv``, which converts sas7bdat files to csv files. It will + also print out header information and meta data using the ``--header`` + option and it will batch convert files as well. Use the ``--help`` + option for more information. + + As is, I’ve successfully tested the script almost three hundred sample + files I found on the internet. For the most part, it works well. We can + now read compressed files! + + I’m sure there are more issues that I haven’t come across yet. Please + let me know if you come across a data file that isn’t supported and I’ll + see if I can add support for the file. + + Usage + ===== + + To create a sas7bdat object, simply pass the constructor a file path. + The object is iterable so you can read the contents like this: + + :: + + #!python + from sas7bdat import SAS7BDAT + with SAS7BDAT('foo.sas7bdat') as f: + for row in f: + print row + + The values in each row will be a ``string``, ``float``, + ``datetime.date``, ``datetime.datetime``, or ``datetime.time`` instance. + + If you’d like to get a pandas DataFrame, use the ``to_data_frame`` + method: + + :: + + #!python + df = f.to_data_frame() + Keywords: sas,sas7bdat,csv,converter Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable @@ -16,6 +62,7 @@ Classifier: License :: OSI Approved :: MIT License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python :: 2.6 +Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Topic :: Text Processing Classifier: Topic :: Utilities diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sas7bdat-2.0.7/sas7bdat.py new/sas7bdat-2.2.1/sas7bdat.py --- old/sas7bdat-2.0.7/sas7bdat.py 2016-01-07 02:27:26.000000000 +0100 +++ new/sas7bdat-2.2.1/sas7bdat.py 2018-11-05 05:56:25.000000000 +0100 @@ -13,6 +13,7 @@ import platform import struct import sys +from codecs import open from datetime import datetime, timedelta import six @@ -202,186 +203,96 @@ class RDCDecompressor(Decompressor): """ Decompresses data using the Ross Data Compression algorithm + http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/ + articles/CUJ/1992/9210/ross/ross.htm """ - def bytes_to_bits(self, src, offset, length): - result = [0] * (length * 8) - for i in xrange(length): - b = src[offset + i] - for bit in xrange(8): - result[8 * i + (7 - bit)] = 0 if ((b & (1 << bit)) == 0) else 1 - return result - - def ensure_capacity(self, src, capacity): - if capacity >= len(src): - new_len = max(capacity, 2 * len(src)) - src.extend([0] * (new_len - len(src))) - return src - - def is_short_rle(self, first_byte_of_cb): - return first_byte_of_cb in set([0x00, 0x01, 0x02, 0x03, 0x04, 0x05]) - - def is_single_byte_marker(self, first_byte_of_cb): - return first_byte_of_cb in set([0x02, 0x04, 0x06, 0x08, 0x0A]) - - def is_two_bytes_marker(self, double_bytes_cb): - return len(double_bytes_cb) == 2 and\ - ((double_bytes_cb[0] >> 4) & 0xF) > 2 - - def is_three_bytes_marker(self, three_byte_marker): - flag = three_byte_marker[0] >> 4 - return len(three_byte_marker) == 3 and (flag & 0xF) in set([1, 2]) - - def get_length_of_rle_pattern(self, first_byte_of_cb): - if first_byte_of_cb <= 0x05: - return first_byte_of_cb + 3 - return 0 - - def get_length_of_one_byte_pattern(self, first_byte_of_cb): - return first_byte_of_cb + 14\ - if self.is_single_byte_marker(first_byte_of_cb) else 0 - - def get_length_of_two_bytes_pattern(self, double_bytes_cb): - return (double_bytes_cb[0] >> 4) & 0xF - - def get_length_of_three_bytes_pattern(self, p_type, three_byte_marker): - if p_type == 1: - return 19 + (three_byte_marker[0] & 0xF) +\ - (three_byte_marker[1] * 16) - elif p_type == 2: - return three_byte_marker[2] + 16 - return 0 - - def get_offset_for_one_byte_pattern(self, first_byte_of_cb): - if first_byte_of_cb == 0x08: - return 24 - elif first_byte_of_cb == 0x0A: - return 40 - return 0 - - def get_offset_for_two_bytes_pattern(self, double_bytes_cb): - return 3 + (double_bytes_cb[0] & 0xF) + (double_bytes_cb[1] * 16) - - def get_offset_for_three_bytes_pattern(self, triple_bytes_cb): - return 3 + (triple_bytes_cb[0] & 0xF) + (triple_bytes_cb[1] * 16) - - def clone_byte(self, b, length): - return [b] * length - def decompress_row(self, offset, length, result_length, page): - b = self.to_ord - c = self.to_chr - src_row = [b(x) for x in page[offset:offset + length]] + src_row = [self.to_ord(x) for x in page[offset:offset + length]] out_row = [0] * result_length + ctrl_mask = 0 + ctrl_bits = 0 src_offset = 0 out_offset = 0 + + # process each item in src_row while src_offset < (len(src_row) - 2): - prefix_bits = self.bytes_to_bits(src_row, src_offset, 2) - src_offset += 2 - for bit_index in xrange(16): - if src_offset >= len(src_row): - break - if prefix_bits[bit_index] == 0: - out_row = self.ensure_capacity(out_row, out_offset) - out_row[out_offset] = src_row[src_offset] - src_offset += 1 - out_offset += 1 - continue - marker_byte = src_row[src_offset] - try: - next_byte = src_row[src_offset + 1] - except IndexError: - break - if self.is_short_rle(marker_byte): - length = self.get_length_of_rle_pattern(marker_byte) - out_row = self.ensure_capacity( - out_row, out_offset + length - ) - pattern = self.clone_byte(next_byte, length) - out_row[out_offset:out_offset + length] = pattern - out_offset += length - src_offset += 2 - continue - elif self.is_single_byte_marker(marker_byte) and not\ - ((next_byte & 0xF0) == ((next_byte << 4) & 0xF0)): - length = self.get_length_of_one_byte_pattern(marker_byte) - out_row = self.ensure_capacity( - out_row, out_offset + length - ) - back_offset = self.get_offset_for_one_byte_pattern( - marker_byte - ) - start = out_offset - back_offset - end = start + length - out_row[out_offset:out_offset + length] =\ - out_row[start:end] - src_offset += 1 - out_offset += length - continue - two_bytes_marker = src_row[src_offset:src_offset + 2] - if self.is_two_bytes_marker(two_bytes_marker): - length = self.get_length_of_two_bytes_pattern( - two_bytes_marker - ) - out_row = self.ensure_capacity( - out_row, out_offset + length - ) - back_offset = self.get_offset_for_two_bytes_pattern( - two_bytes_marker - ) - start = out_offset - back_offset - end = start + length - out_row[out_offset:out_offset + length] =\ - out_row[start:end] - src_offset += 2 - out_offset += length - continue - three_bytes_marker = src_row[src_offset:src_offset + 3] - if self.is_three_bytes_marker(three_bytes_marker): - p_type = (three_bytes_marker[0] >> 4) & 0x0F - back_offset = 0 - if p_type == 2: - back_offset = self.get_offset_for_three_bytes_pattern( - three_bytes_marker - ) - length = self.get_length_of_three_bytes_pattern( - p_type, three_bytes_marker - ) - out_row = self.ensure_capacity( - out_row, out_offset + length - ) - if p_type == 1: - pattern = self.clone_byte( - three_bytes_marker[2], length - ) - else: - start = out_offset - back_offset - end = start + length - pattern = out_row[start:end] - out_row[out_offset:out_offset + length] = pattern - src_offset += 3 - out_offset += length - continue - else: - self.parent.logger.error( - 'unknown marker %s at offset %s', src_row[src_offset], - src_offset - ) - break - return b''.join([c(x) for x in out_row]) + # get new load of control bits if needed + ctrl_mask = ctrl_mask >> 1 + if ctrl_mask == 0: + ctrl_bits = (src_row[src_offset] << 8) +\ + src_row[src_offset + 1] + src_offset += 2 + ctrl_mask = 0x8000 + + # just copy this char if control bit is zero + if (ctrl_bits & ctrl_mask) == 0: + out_row[out_offset] = src_row[src_offset] + out_offset += 1 + src_offset += 1 + continue + + # undo the compression code + cmd = (src_row[src_offset] >> 4) & 0x0F + cnt = src_row[src_offset] & 0x0F + src_offset += 1 + + if cmd == 0: # short rle + cnt += 3 + for k in xrange(cnt): + out_row[out_offset + k] = src_row[src_offset] + out_offset += cnt + src_offset += 1 + elif cmd == 1: # long rle + cnt += src_row[src_offset] << 4 + cnt += 19 + src_offset += 1 + for k in xrange(cnt): + out_row[out_offset + k] = src_row[src_offset] + out_offset += cnt + src_offset += 1 + elif cmd == 2: # long pattern + ofs = cnt + 3 + ofs += src_row[src_offset] << 4 + src_offset += 1 + cnt = src_row[src_offset] + src_offset += 1 + cnt += 16 + for k in xrange(cnt): + out_row[out_offset + k] = out_row[out_offset - ofs + k] + out_offset += cnt + elif cmd >= 3 and cmd <= 15: # short pattern + ofs = cnt + 3 + ofs += src_row[src_offset] << 4 + src_offset += 1 + for k in xrange(cmd): + out_row[out_offset + k] = out_row[out_offset - ofs + k] + out_offset += cmd + else: + self.parent.logger.error( + 'unknown marker %s at offset %s', src_row[src_offset], + src_offset + ) + break + return b''.join([self.to_chr(x) for x in out_row]) class SAS7BDAT(object): """ SAS7BDAT(path[, log_level[, extra_time_format_strings[, \ -extra_date_time_format_strings[, extra_date_format_strings]]]]) -> \ +extra_date_time_format_strings[, extra_date_format_strings[, \ +fh=fh]]]]]) -> \ SAS7BDAT object - Open a SAS7BDAT file. The log level are standard logging levels - (defaults to logging.INFO). + Open a SAS7BDAT file or use an existing file handle. + The log level are standard logging levels (defaults to logging.INFO). If your sas7bdat file uses non-standard format strings for time, datetime, or date values, pass those strings into the constructor using the appropriate kwarg. + + The file will be opened from the path supplied, unless a file handle + is supplied. The file handle should be opened in binary mode for + correct operation. """ _open_files = [] RLE_COMPRESSION = b'SASYZCRL' @@ -410,7 +321,8 @@ skip_header=False, encoding='utf8', encoding_errors='ignore', - align_correction=True): + align_correction=True, + fh=None): """ x.__init__(...) initializes x; see help(type(x)) for signature """ @@ -433,7 +345,7 @@ self.encoding = encoding self.encoding_errors = encoding_errors self.align_correction = align_correction - self._file = open(self.path, 'rb') + self._file = fh or open(self.path, 'rb') self._open_files.append(self._file) self.cached_page = None self.current_page_type = None @@ -745,7 +657,8 @@ ).decode(self.encoding, self.encoding_errors)) return row_elements - def convert_file(self, out_file, delimiter=',', step_size=100000): + def convert_file(self, out_file, delimiter=',', step_size=100000, + encoding=None): """ convert_file(out_file[, delimiter[, step_size]]) -> None @@ -761,7 +674,7 @@ if out_file == '-': out_f = sys.stdout else: - out_f = open(out_file, 'w') + out_f = open(out_file, 'w', encoding=encoding) out = csv.writer(out_f, lineterminator='\n', delimiter=delimiter) i = 0 for i, line in enumerate(self, 1): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sas7bdat-2.0.7/setup.py new/sas7bdat-2.2.1/setup.py --- old/sas7bdat-2.0.7/setup.py 2016-01-07 02:27:50.000000000 +0100 +++ new/sas7bdat-2.2.1/setup.py 2018-11-05 06:24:05.000000000 +0100 @@ -4,6 +4,16 @@ import sys from distutils.core import setup +try: + from pypandoc import convert_file +except ImportError: + print('warning: pypandoc not found, could not convert Markdown to RST.') + + def convert_file(filename, to): + with open(filename, 'r') as f: + data = f.read() + return data + if sys.version_info < (2, 6): print("Sorry, this module only works on 2.6+, 3+") @@ -11,12 +21,13 @@ setup(name='sas7bdat', - version='2.0.7', + version='2.2.1', author='Jared Hobbs', author_email='ja...@pyhacker.com', license='MIT', url='https://bitbucket.org/jaredhobbs/sas7bdat', description='A sas7bdat file reader for Python', + long_description=convert_file('README.md', 'rst'), py_modules=['sas7bdat'], scripts=['scripts/sas7bdat_to_csv'], install_requires=['six>=1.8.0'], @@ -28,6 +39,7 @@ 'License :: OSI Approved :: MIT License', 'Operating System :: OS Independent', 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Topic :: Text Processing', 'Topic :: Utilities',