Author: Brian Kearns <bdkea...@gmail.com> Branch: Changeset: r62605:b4901c26d853 Date: 2013-03-21 03:24 -0400 http://bitbucket.org/pypy/pypy/changeset/b4901c26d853/
Log: simplify/optimize RStringIO by changing it to use StringBuilder diff --git a/rpython/rlib/rStringIO.py b/rpython/rlib/rStringIO.py --- a/rpython/rlib/rStringIO.py +++ b/rpython/rlib/rStringIO.py @@ -1,6 +1,4 @@ - -PIECES = 80 -BIGPIECES = 32 +from rpython.rlib.rstring import StringBuilder AT_END = -1 @@ -8,8 +6,7 @@ class RStringIO(object): """RPython-level StringIO object. The fastest path through this code is for the case of a bunch of write() - followed by getvalue(). For at most PIECES write()s and one getvalue(), - there is one copy of the data done, as if ''.join() was used. + followed by getvalue(). """ _mixin_ = True # for interp_stringio.py @@ -18,20 +15,12 @@ # * the list of characters self.bigbuffer; # * each of the strings in self.strings. # - # Invariants: - # * self.numbigstrings <= self.numstrings; - # * all strings in self.strings[self.numstrings:PIECES] are empty. - # - self.strings = [''] * PIECES - self.numstrings = 0 - self.numbigstrings = 0 + self.strings = StringBuilder() self.bigbuffer = [] self.pos = AT_END def close(self): self.strings = None - self.numstrings = 0 - self.numbigstrings = 0 self.bigbuffer = None def is_closed(self): @@ -40,58 +29,21 @@ def getvalue(self): """If self.strings contains more than 1 string, join all the strings together. Return the final single string.""" - if len(self.bigbuffer) > 0: + if len(self.bigbuffer): self.copy_into_bigbuffer() return ''.join(self.bigbuffer) - if self.numstrings > 1: - result = self.strings[0] = ''.join(self.strings) - for i in range(1, self.numstrings): - self.strings[i] = '' - self.numstrings = 1 - self.numbigstrings = 1 - else: - result = self.strings[0] - return result + return self.strings.build() def getsize(self): result = len(self.bigbuffer) - for i in range(0, self.numstrings): - result += len(self.strings[i]) + result += self.strings.getlength() return result def copy_into_bigbuffer(self): """Copy all the data into the list of characters self.bigbuffer.""" - for i in range(0, self.numstrings): - self.bigbuffer += self.strings[i] - self.strings[i] = '' - self.numstrings = 0 - self.numbigstrings = 0 - return self.bigbuffer - - def reduce(self): - """Reduce the number of (non-empty) strings in self.strings.""" - # When self.pos == AT_END, the calls to write(str) accumulate - # the strings in self.strings until all PIECES slots are filled. - # Then the reduce() method joins all the strings and put the - # result back into self.strings[0]. The next time all the slots - # are filled, we only join self.strings[1:] and put the result - # in self.strings[1]; and so on. The purpose of this is that - # the string resulting from a join is expected to be big, so the - # next join operation should only join the newly added strings. - # When we have done this BIGPIECES times, the next join collects - # all strings again into self.strings[0] and we start from - # scratch. - limit = self.numbigstrings - self.strings[limit] = ''.join(self.strings[limit:]) - for i in range(limit + 1, self.numstrings): - self.strings[i] = '' - self.numstrings = limit + 1 - if limit < BIGPIECES: - self.numbigstrings = limit + 1 - else: - self.numbigstrings = 0 - assert self.numstrings <= BIGPIECES + 1 - return self.numstrings + if self.strings.getlength(): + self.bigbuffer += self.strings.build() + self.strings = StringBuilder() def write(self, buffer): # Idea: for the common case of a sequence of write() followed @@ -110,30 +62,25 @@ else: # slow path: collect all data into self.bigbuffer and # handle the various cases - bigbuffer = self.copy_into_bigbuffer() - fitting = len(bigbuffer) - p + self.copy_into_bigbuffer() + fitting = len(self.bigbuffer) - p if fitting > 0: # the write starts before the end of the data fitting = min(len(buffer), fitting) for i in range(fitting): - bigbuffer[p+i] = buffer[i] + self.bigbuffer[p+i] = buffer[i] if len(buffer) > fitting: # the write extends beyond the end of the data - bigbuffer += buffer[fitting:] + self.bigbuffer += buffer[fitting:] endp = AT_END self.pos = endp return else: # the write starts at or beyond the end of the data - bigbuffer += '\x00' * (-fitting) + self.bigbuffer += '\x00' * (-fitting) self.pos = AT_END # fall-through to the fast path # Fast path. - # See comments in reduce(). - count = self.numstrings - if count == PIECES: - count = self.reduce() - self.strings[count] = buffer - self.numstrings = count + 1 + self.strings.append(buffer) def seek(self, position, mode=0): if mode == 1: @@ -165,8 +112,8 @@ if p == AT_END: return '' assert p >= 0 - bigbuffer = self.copy_into_bigbuffer() - mysize = len(bigbuffer) + self.copy_into_bigbuffer() + mysize = len(self.bigbuffer) count = mysize - p if n >= 0: count = min(n, count) @@ -174,10 +121,10 @@ return '' if p == 0 and count == mysize: self.pos = AT_END - return ''.join(bigbuffer) + return ''.join(self.bigbuffer) else: self.pos = p + count - return ''.join(bigbuffer[p:p+count]) + return ''.join(self.bigbuffer[p:p+count]) def truncate(self, size): # NB. 'size' is mandatory. This has the same un-Posix-y semantics @@ -188,10 +135,8 @@ self.copy_into_bigbuffer() else: # we can drop all extra strings - for i in range(0, self.numstrings): - self.strings[i] = '' - self.numstrings = 0 - self.numbigstrings = 0 + if self.strings.getlength(): + self.strings = StringBuilder() if size < len(self.bigbuffer): del self.bigbuffer[size:] self.pos = AT_END _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit