Hi, ###in.txt <kbd class="command"> cp -v --remove-destination /usr/share/zoneinfo/ <em class="replaceable"><code><xxx></code></em> \ /etc/localtime </kbd>
import sys import unicodedata from bs4 import BeautifulSoup file_name="in.txt" html_doc=open(file_name,'r') soup=BeautifulSoup(html_doc) #print soup.prettify().encode('utf-8') #file_to_write.writelines( soup.prettify().encode() ) all_kbd=soup.find_all('kbd') for line in all_kbd: if line.string == None: extract_code=line.code.extract().string #store_code=line.code.decompose() for inside_line in line: if "<<" not in inside_line and "EOF" not in inside_line: if len(inside_line)>0: print inside_line print extract_code expected output: cp -v --remove-destination /usr/share/zoneinfo/<xxx>\ /etc/localtime Got output: cp -v --remove-destination /usr/share/zoneinfo/ None \ /etc/localtime None shibly
-- https://mail.python.org/mailman/listinfo/python-list