Yuvipanda has uploaded a new change for review. https://gerrit.wikimedia.org/r/257827
Change subject: Sanitize HTML output from nbconvert ...................................................................... Sanitize HTML output from nbconvert - Whitelist based filtering, using 'bleach' python package - Whitelist should probably move to config and be overrideable by site administrators Change-Id: I3c73cd63c84175d0fe99a3cb71a93632418110a1 --- M convertor.py A safe_html_template.tpl 2 files changed, 74 insertions(+), 1 deletion(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/NotebookViewer refs/changes/27/257827/1 diff --git a/convertor.py b/convertor.py index b15be4e..a225e3d 100755 --- a/convertor.py +++ b/convertor.py @@ -5,11 +5,73 @@ reload(sys) sys.setdefaultencoding("utf-8") +import os +import jinja2 from nbconvert.exporters import HTMLExporter from traitlets.config import Config +import bleach + +ALLOWED_TAGS = [ + 'a', + 'abbr', + 'acronym', + 'b', + 'blockquote', + 'code', + 'pre', + 'em', + 'i', + 'li', + 'ol', + 'strong', + 'ul', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'img', + 'div', + 'span', + 'table', + 'tr', + 'td', + 'th', + 'thead', + 'tbody', + 'p' +] + + +ALLOWED_ATTRIBUTES = { + 'a': ['href', 'rel', 'title'], + '*': ['class', 'style'], + 'table': ['border'] + } + +ALLOWED_STYLES = [ + 'text-align', + 'color', + 'background-color', +] + + +def whitelist_tags(source): + return bleach.clean( + source, tags=ALLOWED_TAGS, + attributes=ALLOWED_ATTRIBUTES, + styles=ALLOWED_STYLES) + +here = os.path.dirname(os.path.realpath(__file__)) +filesystem_loader = jinja2.FileSystemLoader(here) config = Config({ - "HTMLExporter": {"template_file": "basic"}, + "HTMLExporter": { + "template_file": "safe_html_template", + "extra_loaders": [filesystem_loader], + "filters": {'whitelist_tags': whitelist_tags}, + }, 'NbConvertBase': { 'display_data_priority': [ 'text/html', @@ -24,6 +86,8 @@ } }) ex = HTMLExporter(config=config) +# HACK: Somehow the exporter doesn't seem to pick this up from the config?! +ex.extra_loaders = [filesystem_loader] html, extra = ex.from_file(sys.stdin) sys.stdout.write(html) diff --git a/safe_html_template.tpl b/safe_html_template.tpl new file mode 100644 index 0000000..518ab6c --- /dev/null +++ b/safe_html_template.tpl @@ -0,0 +1,9 @@ +{%- extends 'basic.tpl' -%} + +{%- block markdowncell -%} + {{super() | whitelist_tags }} +{%- endblock markdowncell -%} + +{%- block output -%} + {{super() | whitelist_tags }} +{%- endblock output -%} -- To view, visit https://gerrit.wikimedia.org/r/257827 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I3c73cd63c84175d0fe99a3cb71a93632418110a1 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/NotebookViewer Gerrit-Branch: master Gerrit-Owner: Yuvipanda <yuvipa...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits