include/tools/inetmime.hxx | 21 +++++++ tools/qa/cppunit/test_inetmime.cxx | 103 ++++++++++++++++++++++++++++++++++--- tools/source/inet/inetmime.cxx | 21 ++++--- 3 files changed, 128 insertions(+), 17 deletions(-)
New commits: commit bef9fe6e3decc92bdcec6415b1898e4a0202cc6a Author: Jochen Nitschke <j.nitschke+loger...@ok.de> Date: Tue Apr 18 15:23:56 2017 +0200 extend unit test for INetMIME::scanContentType This reverts parts of commit 631b67952909a73ba1851417bd2edbe02ad7be5a and commit abc6071b7a8af354a56c91e4caecd8afc79f55cc. some of the removed fields are usefull, m_bConverted should be checked by callers fixed 2 bugs and added test cases: * extended attributes with more than 2 sections were not parsed * extended attributes with more than 1 section were not parsed if there was an other attribute Change-Id: I61ab2af7c5151ef1bcd80cc159fa2b99559374a8 Reviewed-on: https://gerrit.libreoffice.org/36913 Tested-by: Jenkins <c...@libreoffice.org> Reviewed-by: Stephan Bergmann <sberg...@redhat.com> diff --git a/include/tools/inetmime.hxx b/include/tools/inetmime.hxx index e2c58b5ea056..cd66173a8d6c 100644 --- a/include/tools/inetmime.hxx +++ b/include/tools/inetmime.hxx @@ -30,6 +30,16 @@ struct INetContentTypeParameter { + /** The optional character set specification (see RFC 2231), in US-ASCII + encoding and converted to lower case. + */ + OString m_sCharset; + + /** The optional language specification (see RFC 2231), in US-ASCII + encoding and converted to lower case. + */ + OString m_sLanguage; + /** The attribute value. If the value is a quoted-string, it is 'unpacked.' If a character set is specified, and the value can be converted to Unicode, this is done. Also, if no character set is @@ -49,9 +59,18 @@ struct INetContentTypeParameter */ OUString m_sValue; + /** This is true if the value is successfully converted to Unicode, and + false if the value is a special mixture of ISO-LATIN-1 characters and + characters from Unicode's Private Use Area. + */ + bool m_bConverted; }; -// the key is the m_sAttribute again; all keys are lower case: +/** The key is the name of the attribute, in US-ASCII encoding and converted + to lower case. If a parameter value is split as described in RFC 2231, + there will only be one item for the complete parameter, with the attribute + name lacking any section suffix. + */ typedef std::unordered_map<OString, INetContentTypeParameter, OStringHash> INetContentTypeParameterList; diff --git a/tools/qa/cppunit/test_inetmime.cxx b/tools/qa/cppunit/test_inetmime.cxx index 1a5d16b5aba6..8b953526778c 100644 --- a/tools/qa/cppunit/test_inetmime.cxx +++ b/tools/qa/cppunit/test_inetmime.cxx @@ -35,11 +35,13 @@ namespace public: void test_decodeHeaderFieldBody(); - void test_scanContentType(); + void test_scanContentType_basic(); + void test_scanContentType_rfc2231(); CPPUNIT_TEST_SUITE(Test); CPPUNIT_TEST(test_decodeHeaderFieldBody); - CPPUNIT_TEST(test_scanContentType); + CPPUNIT_TEST(test_scanContentType_basic); + CPPUNIT_TEST(test_scanContentType_rfc2231); CPPUNIT_TEST_SUITE_END(); }; @@ -56,17 +58,61 @@ namespace CPPUNIT_ASSERT(testDecode("=?iso-8859-1?B?QUJD?=", "ABC")); } - void Test::test_scanContentType() + void Test::test_scanContentType_basic() { { OUString input - = "TEST/subTST; parm1*0*=US-ASCII'En'5%25%20; Parm1*1*=of%2010"; + = "TEST/subTST; parm1=Value1; Parm2=\"unpacked value; %20\""; + // Just scan input for valid string: + auto end = INetMIME::scanContentType(input.getStr(), input.getStr()+input.getLength()); + CPPUNIT_ASSERT(end != nullptr); + CPPUNIT_ASSERT_EQUAL(OUString(), OUString(end)); + // Scan input and parse type, subType and parameters: + OUString type; + OUString subType; + INetContentTypeParameterList parameters; + end = INetMIME::scanContentType(input.getStr(), input.getStr() + input.getLength(), + &type, &subType, ¶meters); + CPPUNIT_ASSERT(end != nullptr); + CPPUNIT_ASSERT_EQUAL(OUString(), OUString(end)); + CPPUNIT_ASSERT_EQUAL(OUString("test"), type); + CPPUNIT_ASSERT_EQUAL(OUString("subtst"), subType); + CPPUNIT_ASSERT_EQUAL( + INetContentTypeParameterList::size_type(2), parameters.size()); + auto i = parameters.find("parm1"); + CPPUNIT_ASSERT(i != parameters.end()); + CPPUNIT_ASSERT_EQUAL(OString(), i->second.m_sCharset); + CPPUNIT_ASSERT_EQUAL(OString(), i->second.m_sLanguage); + CPPUNIT_ASSERT_EQUAL(OUString("Value1"), i->second.m_sValue); + CPPUNIT_ASSERT(i->second.m_bConverted); + i = parameters.find("parm2"); + CPPUNIT_ASSERT(i != parameters.end()); + CPPUNIT_ASSERT_EQUAL(OString(), i->second.m_sCharset); + CPPUNIT_ASSERT_EQUAL(OString(), i->second.m_sLanguage); + CPPUNIT_ASSERT_EQUAL(OUString("unpacked value; %20"), i->second.m_sValue); + CPPUNIT_ASSERT(i->second.m_bConverted); + } + } + + void Test::test_scanContentType_rfc2231() + { + { + // Test extended parameter with value split in 3 sections: + OUString input + = "TEST/subTST; " + "parm1*0*=US-ASCII'En'5%25%20; " + "Parm1*1*=of%2010;\t" + "parm1*2*=%20%3d%200.5"; + // Just scan input for valid string: + auto end = INetMIME::scanContentType(input.getStr(), input.getStr()+input.getLength()); + CPPUNIT_ASSERT(end != nullptr); + CPPUNIT_ASSERT_EQUAL(OUString(), OUString(end)); + // Scan input and parse type, subType and parameters: OUString type; OUString subType; INetContentTypeParameterList parameters; - auto end = INetMIME::scanContentType( - input.getStr(), input.getStr() + input.getLength(), &type, - &subType, ¶meters); + end = INetMIME::scanContentType(input.getStr(), input.getStr() + input.getLength(), + &type, &subType, ¶meters); CPPUNIT_ASSERT(end != nullptr); CPPUNIT_ASSERT_EQUAL(OUString(), OUString(end)); CPPUNIT_ASSERT_EQUAL(OUString("test"), type); @@ -75,7 +121,48 @@ namespace INetContentTypeParameterList::size_type(1), parameters.size()); auto i = parameters.find("parm1"); CPPUNIT_ASSERT(i != parameters.end()); - CPPUNIT_ASSERT_EQUAL(OUString("5% of 10"), i->second.m_sValue); + CPPUNIT_ASSERT_EQUAL(OString("us-ascii"), i->second.m_sCharset); + CPPUNIT_ASSERT_EQUAL(OString("en"), i->second.m_sLanguage); + CPPUNIT_ASSERT_EQUAL(OUString("5% of 10 = 0.5"), i->second.m_sValue); + CPPUNIT_ASSERT(i->second.m_bConverted); + + // Test extended parameters with different value charsets: + input = "TEST/subTST;" + "parm1*0*=us-ascii'en'value;PARM1*1*=1;" + "parm2*0*=WINDOWS-1250'en-GB'value2%20%80;" + "parm3*0*=UNKNOWN'EN'value3"; + // Just scan input for valid string: + end = INetMIME::scanContentType(input.getStr(), input.getStr()+input.getLength()); + CPPUNIT_ASSERT(end != nullptr); + CPPUNIT_ASSERT_EQUAL(OUString(), OUString(end)); + // Scan input and parse type, subType and parameters: + end = INetMIME::scanContentType(input.getStr(), input.getStr() + input.getLength(), + &type, &subType, ¶meters); + CPPUNIT_ASSERT(end != nullptr); + CPPUNIT_ASSERT_EQUAL(OUString(), OUString(end)); + CPPUNIT_ASSERT_EQUAL(OUString("test"), type); + CPPUNIT_ASSERT_EQUAL(OUString("subtst"), subType); + CPPUNIT_ASSERT_EQUAL( + INetContentTypeParameterList::size_type(3), parameters.size()); + i = parameters.find("parm1"); + CPPUNIT_ASSERT(i != parameters.end()); + CPPUNIT_ASSERT_EQUAL(OString("us-ascii"), i->second.m_sCharset); + CPPUNIT_ASSERT_EQUAL(OString("en"), i->second.m_sLanguage); + CPPUNIT_ASSERT_EQUAL(OUString("value1"), i->second.m_sValue); + CPPUNIT_ASSERT(i->second.m_bConverted); + i = parameters.find("parm2"); + CPPUNIT_ASSERT(i != parameters.end()); + CPPUNIT_ASSERT_EQUAL(OString("windows-1250"), i->second.m_sCharset); + CPPUNIT_ASSERT_EQUAL(OString("en-gb"), i->second.m_sLanguage); + // Euro currency sign, windows-1250 x80 is converted to unicode u20AC: + CPPUNIT_ASSERT_EQUAL(OUString(u"value2 \u20AC"), i->second.m_sValue); + CPPUNIT_ASSERT(i->second.m_bConverted); + i = parameters.find("parm3"); + CPPUNIT_ASSERT(i != parameters.end()); + CPPUNIT_ASSERT_EQUAL(OString("unknown"), i->second.m_sCharset); + CPPUNIT_ASSERT_EQUAL(OString("en"), i->second.m_sLanguage); + // Convertion fails for unknown charsets: + CPPUNIT_ASSERT(!i->second.m_bConverted); } } diff --git a/tools/source/inet/inetmime.cxx b/tools/source/inet/inetmime.cxx index 35afc28b73cb..e95ebd56ef76 100644 --- a/tools/source/inet/inetmime.cxx +++ b/tools/source/inet/inetmime.cxx @@ -369,12 +369,14 @@ struct Parameter Parameter * m_pNext; OString m_aAttribute; OString m_aCharset; + OString m_aLanguage; OString m_aValue; sal_uInt32 m_nSection; bool m_bExtended; inline Parameter(Parameter * pTheNext, const OString& rTheAttribute, const OString& rTheCharset, + const OString& rTheLanguage, const OString& rTheValue, sal_uInt32 nTheSection, bool bTheExtended); }; @@ -382,11 +384,13 @@ struct Parameter inline Parameter::Parameter(Parameter * pTheNext, const OString& rTheAttribute, const OString& rTheCharset, + const OString& rTheLanguage, const OString& rTheValue, sal_uInt32 nTheSection, bool bTheExtended): m_pNext(pTheNext), m_aAttribute(rTheAttribute), m_aCharset(rTheCharset), + m_aLanguage(rTheLanguage), m_aValue(rTheValue), m_nSection(nTheSection), m_bExtended(bTheExtended) @@ -439,16 +443,16 @@ Parameter ** ParameterList::find(const OString& rAttribute, for (; *p; p = &(*p)->m_pNext) { sal_Int32 nCompare = rAttribute.compareTo((*p)->m_aAttribute); - if (nCompare > 0) - return &(*p)->m_pNext; + if (nCompare < 0) + break; else if (nCompare == 0) { - if (nSection > (*p)->m_nSection) - return &(*p)->m_pNext; + if (nSection < (*p)->m_nSection) + break; else if (nSection == (*p)->m_nSection) { rPresent = true; - return p; + break; } } } @@ -537,8 +541,9 @@ bool parseParameters(ParameterList const & rInput, break; }; } - INetContentTypeParameter x {aValue}; // workaround ICE in VisualStudio2013 - auto const ret = pOutput->insert({p->m_aAttribute, x }); + auto const ret = pOutput->insert( + {p->m_aAttribute, + {p->m_aCharset, p->m_aLanguage, aValue, !bBadEncoding}}); SAL_INFO_IF(!ret.second, "tools", "INetMIME: dropping duplicate parameter: " << p->m_aAttribute); p = pNext; @@ -877,7 +882,7 @@ sal_Unicode const * scanParameters(sal_Unicode const * pBegin, RTL_TEXTENCODING_UTF8); } - *pPos = new Parameter(*pPos, aAttribute, aCharset, aValue, + *pPos = new Parameter(*pPos, aAttribute, aCharset, aLanguage, aValue, nSection, bExtended); } return parseParameters(aList, pParameters) ? pParameterBegin : pBegin; _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits