sc/source/filter/html/htmlpars.cxx |   96 ++++++++++++++++++++-----------------
 sc/source/filter/inc/htmlpars.hxx  |    2 
 2 files changed, 55 insertions(+), 43 deletions(-)

New commits:
commit a0c23b40905d7b59caf46fc8887864ab35142522
Author:     Kevin Suo <suokunl...@126.com>
AuthorDate: Sat Oct 16 11:07:13 2021 +0800
Commit:     Noel Grandin <noel.gran...@collabora.co.uk>
CommitDate: Tue Oct 19 16:13:45 2021 +0200

    tdf#96499 sc htmlimport: fix broken CSSHandler so that...
    
    ...ScHTMLStyles can handle those MSO-Number-Formats again.
    
    The CSSHandler was added by the following commit, after which the
    sc htmlimport can parse the CSS stylesheets and set number formats
    of table cells accordingly:
    
    commit 24d8e4eaf4543c5b39b9e816d8514525b098827d
    Author: Kohei Yoshida
    Date:   Thu Jul 28 00:46:55 2011 -0400
        Parse CSS in the <style> content and set number formats to cells.
    
    However, this was broken since "selector_name" does not no
    longer functions in the handler since upgrade to orcus 0.9:
    
    commit edb38d702dd5a058ae0702b73a43328318b94649
    Author: David Tardon
    Date:   Thu Jun 4 16:09:48 2015 +0200
        update to liborcus 0.9.1
    
    And further the selector_name was then removed by the
    following commitd due to loplugin:unreffun checks:
    
    commit c76a8580c62299f310e7e43927afc133e2f02350
    Author: Miklos Vajna
    Date:   Fri Jun 5 12:29:36 2015 +0200
        sc: loplugin:unreffun
    
    commit 9eeac5506bd726350b51cb0c36606646f04cbae9
    Author: Miklos Vajna
    Date:   Fri Jun 5 12:32:10 2015 +0200
        sc: another loplugin:unreffun
    
    And then this was further broken by the following commit, in which
    it removed the mrStyles of type "ScHTMLStyles" from the CSSHandler
    class:
    
    commit 5d86154f49d713dada4aaa541755076cfeefa2c6
    Author: Noel Grandin
    Date:   Tue Sep 18 09:57:26 2018 +0200
        loplugin:unusedfields improve search for unused collection fields
    
    This patch partially reverted those code, revised the
    CSSHandler member functions to make it work. More handler
    functions need to be implemented.
    
    Note that this bug is still not resoved yet because the orcus
    css parser does not accept css strings to include unquoted
    non-ascii chars as property values. This will be addressed in
    a separate patch.
    
    Change-Id: I8883f5a9362d9476dbfa9a2eed283c6f5c18305d
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/123715
    Tested-by: Jenkins
    Reviewed-by: Noel Grandin <noel.gran...@collabora.co.uk>

diff --git a/sc/source/filter/html/htmlpars.cxx 
b/sc/source/filter/html/htmlpars.cxx
index b6945b7dc639..fc61ea499279 100644
--- a/sc/source/filter/html/htmlpars.cxx
+++ b/sc/source/filter/html/htmlpars.cxx
@@ -3014,7 +3014,7 @@ namespace {
 /**
  * Handler class for the CSS parser.
  */
-class CSSHandler
+class CSSHandler: public orcus::css_handler
 {
     struct MemStr
     {
@@ -3023,20 +3023,53 @@ class CSSHandler
 
         MemStr() : mp(nullptr), mn(0) {}
         MemStr(const char* p, size_t n) : mp(p), mn(n) {}
+        MemStr(const MemStr& r) : mp(r.mp), mn(r.mn) {}
         MemStr& operator=(const MemStr& r) = default;
     };
 
-    MemStr maPropName;  /// current property name.
-    MemStr maPropValue; /// current property value.
+    typedef std::pair<MemStr, MemStr> SelectorName;     // element : class
+    typedef std::vector<SelectorName> SelectorNames;
+
+    SelectorNames maSelectorNames;      // current selector names
+    MemStr maPropName;                  // current property name.
+    MemStr maPropValue;                 // current property value.
+    ScHTMLStyles& mrStyles;
 
 public:
-    explicit CSSHandler() {}
+    explicit CSSHandler(ScHTMLStyles& rStyles):
+        maPropName(),
+        maPropValue(),
+        mrStyles(rStyles)
+     {}
 
+    // selector name starting with "@"
     static void at_rule_name(const char* /*p*/, size_t /*n*/)
     {
         // TODO: For now, we ignore at-rule properties
     }
 
+    // selector name not starting with "." or "#" (i.e. element selectors)
+    void simple_selector_type(const char* pElem, size_t nElem)
+    {
+        MemStr aElem(pElem, nElem); // element given
+        MemStr aClass(nullptr, 0);  // class name not given - to be added in 
the "element global" storage
+        SelectorName aName(aElem, aClass);
+
+        maSelectorNames.push_back(aName);
+    }
+
+    // selector names starting with a "." (i.e. class selector)
+    void simple_selector_class(const char* pClassName, size_t nClassName)
+    {
+        MemStr aElem(nullptr, 0);   // no element given - should be added in 
the "global" storage
+        MemStr aClass(pClassName, nClassName);
+        SelectorName aName(aElem, aClass);
+
+        maSelectorNames.push_back(aName);
+    }
+
+    // TODO: Add other selectors
+
     void property_name(const char* p, size_t n)
     {
         maPropName = MemStr(p, n);
@@ -3047,49 +3080,27 @@ public:
         maPropValue = MemStr(p, n);
     }
 
-    static void begin_parse() {}
-
-    static void end_parse() {}
-
-    static void begin_block() {}
-
-    static void end_block() {}
-
-    static void begin_property() {}
+    void end_block() {
+        maSelectorNames.clear();
+    }
 
     void end_property()
     {
+        SelectorNames::const_iterator itr = maSelectorNames.begin(), itrEnd = 
maSelectorNames.end();
+        for (; itr != itrEnd; ++itr)
+        {
+            // Add this property to the collection for each selector.
+            const SelectorName& rSelName = *itr;
+            const MemStr& rElem = rSelName.first;
+            const MemStr& rClass = rSelName.second;
+            OUString aName(maPropName.mp, maPropName.mn, 
RTL_TEXTENCODING_UTF8);
+            OUString aValue(maPropValue.mp, maPropValue.mn, 
RTL_TEXTENCODING_UTF8);
+            mrStyles.add(rElem.mp, rElem.mn, rClass.mp, rClass.mn, aName, 
aValue);
+        }
         maPropName = MemStr();
         maPropValue = MemStr();
     }
 
-    // new members
-    static void simple_selector_type(const char* /*p*/, size_t /*n*/) {}
-
-    static void simple_selector_class(const char* /*p*/, size_t /*n*/) {}
-
-    static void simple_selector_pseudo_element(orcus::css::pseudo_element_t 
/*pe*/) {}
-
-    static void simple_selector_pseudo_class(orcus::css::pseudo_class_t 
/*pc*/) {}
-
-    static void simple_selector_id(const char* /*p*/, size_t /*n*/) {}
-
-    static void end_simple_selector() {}
-
-    static void end_selector() {}
-
-    static void combinator(orcus::css::combinator_t /*combinator*/) {}
-
-    static void rgb(uint8_t /*red*/ , uint8_t /*green*/ , uint8_t /*blue*/ ) {}
-
-    static void rgba(uint8_t /*red*/ , uint8_t /*green*/ , uint8_t /*blue*/ , 
double /*alpha*/ ) {}
-
-    static void hsl(uint8_t /*hue*/ , uint8_t /*sat*/ , uint8_t /*light*/ ) {}
-
-    static void hsla(uint8_t /*hue*/ , uint8_t /*sat*/ , uint8_t /*light*/ , 
double /*alpha*/ ) {}
-
-    static void url(const char* /*p*/, size_t /*n*/) {}
-
 };
 
 }
@@ -3097,14 +3108,15 @@ public:
 void ScHTMLQueryParser::ParseStyle(std::u16string_view rStrm)
 {
     OString aStr = OUStringToOString(rStrm, RTL_TEXTENCODING_UTF8);
-    CSSHandler aHdl;
+    CSSHandler aHdl(GetStyles());
     orcus::css_parser<CSSHandler> aParser(aStr.getStr(), aStr.getLength(), 
aHdl);
     try
     {
         aParser.parse();
     }
-    catch (const orcus::css::parse_error&)
+    catch (const orcus::css::parse_error& rOrcusParseError)
     {
+        SAL_WARN("sc", "ScHTMLQueryParser::ParseStyle: " << 
rOrcusParseError.what());
         // TODO: Parsing of CSS failed.  Do nothing for now.
     }
 }
diff --git a/sc/source/filter/inc/htmlpars.hxx 
b/sc/source/filter/inc/htmlpars.hxx
index 02af4857616e..11065dec9e88 100644
--- a/sc/source/filter/inc/htmlpars.hxx
+++ b/sc/source/filter/inc/htmlpars.hxx
@@ -609,7 +609,7 @@ private:
     /** Closes the current table, regardless on opening tag. */
     void                CloseTable( const HtmlImportInfo& rInfo );
 
-    static void         ParseStyle(std::u16string_view rStrm);
+    void         ParseStyle(std::u16string_view rStrm);
 
     DECL_LINK( HTMLImportHdl, HtmlImportInfo&, void );
 

Reply via email to