Package: libxapian30 Version: 1.4.5-1 Severity: important Tags: security patch upstream
I spotted an HTML escaping bug in Xapian::MSet::snippet() while working on the code. This issue has been assigned CVE-2018-0499 by the security team. This bug is fixed by yesterday's upstream release 1.4.6 which I'm intending to upload to unstable very shortly. The attached patch should be suitable for fixing this in older 1.4.x releases (1.2.x isn't affected). Cheers, Olly
diff --git a/xapian-core/queryparser/termgenerator_internal.cc b/xapian-core/queryparser/termgenerator_internal.cc index 7fa807db6064..fece98554ebb 100644 --- a/xapian-core/queryparser/termgenerator_internal.cc +++ b/xapian-core/queryparser/termgenerator_internal.cc @@ -432,6 +432,27 @@ SnipPipe::done() } } +inline void +append_escaping_xml(const char* p, const char* end, string& output) +{ + while (p != end) { + char ch = *p++; + switch (ch) { + case '&': + output += "&"; + break; + case '<': + output += "<"; + break; + case '>': + output += ">"; + break; + default: + output += ch; + } + } +} + inline bool SnipPipe::drain(const string & input, const string & hi_start, @@ -465,7 +486,7 @@ SnipPipe::drain(const string & input, if (punc) { // Include end of sentence punctuation. - output.append(input.data() + best_end, i.raw()); + append_escaping_xml(input.data() + best_end, i.raw(), output); } else { // Append "..." or equivalent if this doesn't seem to be the start // of a sentence. @@ -523,8 +544,7 @@ SnipPipe::drain(const string & input, while (i != Utf8Iterator()) { unsigned ch = *i; if (Unicode::is_wordchar(ch)) { - const char * p = input.data() + best_begin; - output.append(p, i.raw() - p); + append_escaping_xml(input.data() + best_begin, i.raw(), output); best_begin = i.raw() - input.data(); break; } @@ -537,22 +557,9 @@ SnipPipe::drain(const string & input, if (phrase_len) output += hi_start; } - while (best_begin != word.term_end) { - char ch = input[best_begin++]; - switch (ch) { - case '&': - output += "&"; - break; - case '<': - output += "<"; - break; - case '>': - output += ">"; - break; - default: - output += ch; - } - } + const char* p = input.data(); + append_escaping_xml(p + best_begin, p + word.term_end, output); + best_begin = word.term_end; if (phrase_len && --phrase_len == 0) output += hi_end; diff --git a/xapian-core/tests/api_snippets.cc b/xapian-core/tests/api_snippets.cc index 4c9296f88d84..70f6afac28bf 100644 --- a/xapian-core/tests/api_snippets.cc +++ b/xapian-core/tests/api_snippets.cc @@ -313,3 +313,23 @@ DEFINE_TESTCASE(snippet_empty, backend) { return true; } + +/// Check snippets escape HTML/XML suitably. +DEFINE_TESTCASE(snippet_html_escape, backend) { + Xapian::Enquire enquire(get_database("apitest_simpledata")); + enquire.set_query(Xapian::Query("foo")); + + Xapian::MSet mset = enquire.get_mset(0, 0); + + Xapian::Stem stem; + + const char *input = "#include <foo.h> to use libfoo"; + TEST_STRINGS_EQUAL(mset.snippet(input, 12, stem), + "...<<b>foo</b>.h> to..."); + + input = "&foo takes the address of foo"; + TEST_STRINGS_EQUAL(mset.snippet(input, strlen(input), stem), + "&<b>foo</b> takes the address of <b>foo</b>"); + + return true; +}
signature.asc
Description: PGP signature