Peng Yu wrote:
For example, the long string is 'abcabc' and the given string is
'abc', then 'abc' appears 2 times in 'abcabc'. Currently, I am calling
'find()' multiple times to figure out how many times a given string
appears in a long string. I'm wondering if there is a function in
python which can directly return this information.

re.findall?

>>> patt = re.compile('abc')
>>> len(patt.findall('abcabc'))
2

For groups of non-overlapping substrings, tested only as far as you see:

8<----------------------------------------------------------------------

import re
from collections import defaultdict

def count(text, *args):
    """
    >>> ret = count('abcabc', 'abc')
    >>> ret['abc']
    2
    >>> ret = count('xabcxabcx', 'abc', 'x')
    >>> ret['abc']
    2
    >>> ret['x']
    3
    >>> ret = count('abcabc', 'abc', 'cab')
    >>> ret['abc']
    2
    >>> ret['cab']
    0
    >>> ret = count('abcabc', 'abc', 'ab')
    >>> ret['abc']
    2
    >>> ret['ab']
    0
    """
    args = map(re.escape, args)
    args.sort()
    args.reverse()
    pattern = re.compile('|'.join(args))
    result = defaultdict(int)
    def callback(match):
        matched = match.group(0)
        result[matched] += 1
        return matched
    pattern.sub(callback, text)
    return result


if __name__ == '__main__':
    import doctest
    doctest.testmod()
8<----------------------------------------------------------------------

--
http://mail.python.org/mailman/listinfo/python-list

Reply via email to