Yuvipanda has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/257827

Change subject: Sanitize HTML output from nbconvert
......................................................................

Sanitize HTML output from nbconvert

- Whitelist based filtering, using 'bleach' python package
- Whitelist should probably move to config and be overrideable
  by site administrators

Change-Id: I3c73cd63c84175d0fe99a3cb71a93632418110a1
---
M convertor.py
A safe_html_template.tpl
2 files changed, 74 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/NotebookViewer 
refs/changes/27/257827/1

diff --git a/convertor.py b/convertor.py
index b15be4e..a225e3d 100755
--- a/convertor.py
+++ b/convertor.py
@@ -5,11 +5,73 @@
 reload(sys)
 sys.setdefaultencoding("utf-8")
 
+import os
+import jinja2
 from nbconvert.exporters import HTMLExporter
 from traitlets.config import Config
+import bleach
+
+ALLOWED_TAGS = [
+    'a',
+    'abbr',
+    'acronym',
+    'b',
+    'blockquote',
+    'code',
+    'pre',
+    'em',
+    'i',
+    'li',
+    'ol',
+    'strong',
+    'ul',
+    'h1',
+    'h2',
+    'h3',
+    'h4',
+    'h5',
+    'h6',
+    'img',
+    'div',
+    'span',
+    'table',
+    'tr',
+    'td',
+    'th',
+    'thead',
+    'tbody',
+    'p'
+]
+
+
+ALLOWED_ATTRIBUTES = {
+    'a': ['href', 'rel', 'title'],
+    '*': ['class', 'style'],
+    'table': ['border']
+    }
+
+ALLOWED_STYLES = [
+    'text-align',
+    'color',
+    'background-color',
+]
+
+
+def whitelist_tags(source):
+    return bleach.clean(
+        source, tags=ALLOWED_TAGS,
+        attributes=ALLOWED_ATTRIBUTES,
+        styles=ALLOWED_STYLES)
+
+here = os.path.dirname(os.path.realpath(__file__))
+filesystem_loader = jinja2.FileSystemLoader(here)
 
 config = Config({
-    "HTMLExporter": {"template_file": "basic"},
+    "HTMLExporter": {
+        "template_file": "safe_html_template",
+        "extra_loaders": [filesystem_loader],
+        "filters": {'whitelist_tags': whitelist_tags},
+    },
     'NbConvertBase': {
         'display_data_priority': [
             'text/html',
@@ -24,6 +86,8 @@
     }
 })
 ex = HTMLExporter(config=config)
+# HACK: Somehow the exporter doesn't seem to pick this up from the config?!
+ex.extra_loaders = [filesystem_loader]
 
 html, extra = ex.from_file(sys.stdin)
 sys.stdout.write(html)
diff --git a/safe_html_template.tpl b/safe_html_template.tpl
new file mode 100644
index 0000000..518ab6c
--- /dev/null
+++ b/safe_html_template.tpl
@@ -0,0 +1,9 @@
+{%- extends 'basic.tpl' -%}
+
+{%- block markdowncell -%}
+    {{super() | whitelist_tags }}
+{%- endblock markdowncell -%}
+
+{%- block output -%}
+    {{super() | whitelist_tags }}
+{%- endblock output -%}

-- 
To view, visit https://gerrit.wikimedia.org/r/257827
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I3c73cd63c84175d0fe99a3cb71a93632418110a1
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/NotebookViewer
Gerrit-Branch: master
Gerrit-Owner: Yuvipanda <yuvipa...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to