I am trying to convert a bunch of html files to markmin, but I got strange
results
I think html to markmin doesn't work well with unicode (utf8 my case)
attached are an example
and this is my controller to do that
def mmtest():
from gluon.html import markmin_serializer
#markmin = TAG(html).flatten(markmin_serializer)
for row in db(db.articles).select():
html = row.content
markmin = TAG(html).flatten(markmin_serializer)
open('d:/mmtest/'+str(row.id)+'.html', 'w').write(html)
open('d:/mmtest/'+str(row.id)+'.mm', 'w').write(markmin)
return "ok"
Also at some point I am getting this error
Traceback (most recent call last):
File "D:\web2py\gluon\restricted.py", line 194, in restricted
exec ccode in environment
File "D:/web2py/applications/fermer/controllers/tests.py", line 79, in
File "D:\web2py\gluon\globals.py", line 149, in
self._caller = lambda f: f()
File "D:/web2py/applications/fermer/controllers/tests.py", line 6, in
mmtest
markmin = TAG(html).flatten(markmin_serializer)
File "D:\web2py\gluon\html.py", line 1054, in __call__
return web2pyHTMLParser(decoder.decoder(html)).tree
File "D:\web2py\gluon\html.py", line 2172, in __init__
self.feed(text)
File "C:\Python27\lib\HTMLParser.py", line 108, in feed
self.goahead(0)
File "C:\Python27\lib\HTMLParser.py", line 148, in goahead
k = self.parse_starttag(i)
File "C:\Python27\lib\HTMLParser.py", line 229, in parse_starttag
endpos = self.check_for_whole_start_tag(i)
File "C:\Python27\lib\HTMLParser.py", line 304, in
check_for_whole_start_tag
self.error("malformed start tag")
File "C:\Python27\lib\HTMLParser.py", line 115, in error
raise HTMLParseError(message, self.getpos())
HTMLParseError: malformed start tag, at line 1, column 4219
Взято из Книга рекордов ГиннесаАвгуст – пора поедания сочных арбузов. Самый большой арбуз в мире вырастила семья из Арканзас, США. Летом 2005 года они вырастили невероятный арбуз весом целых 122 кг. Это средний вес двух взрослых людей. В 2006 году этот рекорд был занесен в Книгу рекордов Гиннеса.Источник:http://skuky.net
1.mm
Description: Binary data