[web2py:38135] Highligth search word in tekst with HTML tags en entities

stelg Thu, 31 Dec 2009 09:41:12 -0800

Enclosed an example that i use to highlight words in a text string
which might also contain HTML tags and entities.


Maybe you can use it in your Web2py applications?

 # -*- coding: utf-8 -*-
import re
from BeautifulSoup import BeautifulStoneSoup
# see http://effbot.org/zone/python-list.htm
# http://effbot.org/librarybook/htmlentitydefs.htm
def remove_tags(in_text):
    # convert in_text to a mutable object (e.g. list)
    altreplacechars =['^','~','|','`','%','#','@','!','+','%']
    # verify first that one of this alternative replace characters do
not appear in search tekst
    altchar = ' '
    for altchar in altreplacechars:
        if not altchar in in_text:
            break
    s_list = list(in_text)
    i = 0
    while i < len(s_list):
        # find the <
        if s_list[i] == '<':
            while s_list[i] != '>':
                # remove everything between the < and the >
                s_list.pop(i)
                s_list.insert(i,altchar)
                i=i+1
            # make sure we get rid of the > to
            s_list.pop(i)
            s_list.insert(i,altchar)
        else:
            i=i+1
    join_char=''
    return join_char.join(s_list)

def insert_highlight_tags(text, searchstring,
start_tag_highlight='<span style="background-color:%s" id="found-
%s">', \
        end_tag_highlight='</span>', color='yellow',
case_sensitive=False):
    converted_text = str(BeautifulStoneSoup(text,
convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
    clean_text = remove_tags(converted_text)
    #print clean_text
    # no_tags_text = list(clean_text)
    text = ''.join(text)
    text = list(converted_text)
    if not case_sensitive:
        clean_text=clean_text.upper()
        searchstring =searchstring.upper()
    idx = [match.start() for match in re.finditer(re.escape
(searchstring), clean_text)]
    offset=0
    diff = 0
    iprev = 0
    tagcounter=0
    for i in idx:
        tagcounter += 1
        diff = i - iprev
        offset = offset + diff
        #no_tags_text.insert(offset, start_tag_highlight %tagcounter)
        text.insert(offset, start_tag_highlight %(color, tagcounter))
        offset = offset + len(searchstring) + 1 # positioning after
search word
        #no_tags_text.insert(offset, end_tag_highlight)
        text.insert(offset, end_tag_highlight)
        offset = offset - len(searchstring) + 1
     # positioning after end tag
        iprev = i

    # no_tags_text = ''.join(no_tags_text)
    text = ''.join(text)
    return text

if __name__ == '__main__':
    import sys
    text = u'''<html><body><h1>This ~is my ^&rsquo; text to
test;<h1><a href="http://localhost";>& O yeh, This is the hyperlink to
my &raquo;local machine&raquo;.</a><p>This is my paragraph< /p></
body></html>'''
    searchstring='~' #search string
    start_tag_highlight='<a style="background-color:red" id="found-
%s">' # start highlight-tag
    end_tag_highlight='</a>' # end highlight-tag

    print insert_highlight_tags(text, searchstring, color='green',
case_sensitive=True)



--

You received this message because you are subscribed to the Google Groups 
"web2py-users" group.
To post to this group, send email to web...@googlegroups.com.
To unsubscribe from this group, send email to 
web2py+unsubscr...@googlegroups.com.
For more options, visit this group at 
http://groups.google.com/group/web2py?hl=en.

[web2py:38135] Highligth search word in tekst with HTML tags en entities

Reply via email to