formula/inc/core_resource.hrc | 6 +++ include/formula/compiler.hxx | 3 + include/formula/opcode.hxx | 2 + sc/inc/helpids.h | 1 sc/inc/scfuncs.hrc | 12 +++++++ sc/qa/unit/ucalc.cxx | 1 sc/source/core/data/funcdesc.cxx | 3 + sc/source/core/inc/interpre.hxx | 1 sc/source/core/tool/interpr1.cxx | 55 +++++++++++++++++++++++++++++++++++ sc/source/core/tool/interpr4.cxx | 1 sc/source/filter/excel/xlformula.cxx | 3 + sc/source/filter/oox/formulabase.cxx | 3 + 12 files changed, 87 insertions(+), 4 deletions(-)
New commits: commit 8dec85a3b3f4cbd46b03f707458347a25cc22c15 Author: Eike Rathke <er...@redhat.com> AuthorDate: Wed Oct 24 19:42:35 2018 +0200 Commit: Eike Rathke <er...@redhat.com> CommitDate: Thu Oct 25 12:13:58 2018 +0200 Resolves: tdf#113977 implement REGEX() spreadsheet function REGEX( Text ; Expression [ ; Replacement ] ) Using ICU regular expressions http://userguide.icu-project.org/strings/regexp Change-Id: I4cb9b8ba77cfb5b8faab93037aa0d947609383d7 Reviewed-on: https://gerrit.libreoffice.org/62332 Reviewed-by: Eike Rathke <er...@redhat.com> Tested-by: Jenkins diff --git a/formula/inc/core_resource.hrc b/formula/inc/core_resource.hrc index 467a64bdf3f0..4cc928fd9a6a 100644 --- a/formula/inc/core_resource.hrc +++ b/formula/inc/core_resource.hrc @@ -465,6 +465,7 @@ const std::pair<const char*, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH_ODFF[] = { "COM.MICROSOFT.ENCODEURL" , SC_OPCODE_ENCODEURL }, { "ORG.LIBREOFFICE.RAWSUBTRACT" , SC_OPCODE_RAWSUBTRACT }, { "ORG.LIBREOFFICE.ROUNDSIG" , SC_OPCODE_ROUNDSIG }, + { "ORG.LIBREOFFICE.REGEX" , SC_OPCODE_REGEX }, { nullptr, -1 } }; @@ -907,6 +908,7 @@ const std::pair<const char*, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH_OOXML[] = { "_xlfn.ENCODEURL" , SC_OPCODE_ENCODEURL }, { "_xlfn.ORG.LIBREOFFICE.RAWSUBTRACT" , SC_OPCODE_RAWSUBTRACT }, { "_xlfn.ORG.LIBREOFFICE.ROUNDSIG" , SC_OPCODE_ROUNDSIG }, + { "_xlfn.ORG.LIBREOFFICE.REGEX" , SC_OPCODE_REGEX }, { nullptr, -1 } }; @@ -1354,6 +1356,7 @@ const std::pair<const char*, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH_PODF[] = { "ENCODEURL" , SC_OPCODE_ENCODEURL }, { "RAWSUBTRACT" , SC_OPCODE_RAWSUBTRACT }, { "ROUNDSIG" , SC_OPCODE_ROUNDSIG }, + { "REGEX" , SC_OPCODE_REGEX }, { nullptr, -1 } }; @@ -1800,6 +1803,7 @@ const std::pair<const char*, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH_API[] = { "ENCODEURL" , SC_OPCODE_ENCODEURL }, { "RAWSUBTRACT" , SC_OPCODE_RAWSUBTRACT }, { "ROUNDSIG" , SC_OPCODE_ROUNDSIG }, + { "REGEX" , SC_OPCODE_REGEX }, { nullptr, -1 } }; @@ -2245,6 +2249,7 @@ const std::pair<const char*, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH[] = { "ENCODEURL" , SC_OPCODE_ENCODEURL }, { "RAWSUBTRACT" , SC_OPCODE_RAWSUBTRACT }, { "ROUNDSIG" , SC_OPCODE_ROUNDSIG }, + { "REGEX" , SC_OPCODE_REGEX }, { nullptr, -1 } }; @@ -2674,6 +2679,7 @@ const std::pair<const char*, int> RID_STRLIST_FUNCTION_NAMES[] = { NC_("RID_STRLIST_FUNCTION_NAMES", "ROUNDSIG") , SC_OPCODE_ROUNDSIG }, { NC_("RID_STRLIST_FUNCTION_NAMES", "FINDB") , SC_OPCODE_FINDB }, { NC_("RID_STRLIST_FUNCTION_NAMES", "SEARCHB") , SC_OPCODE_SEARCHB }, + { NC_("RID_STRLIST_FUNCTION_NAMES", "REGEX") , SC_OPCODE_REGEX }, { nullptr, -1 } }; diff --git a/include/formula/compiler.hxx b/include/formula/compiler.hxx index 0d5b1cc8f632..09a507fc3d68 100644 --- a/include/formula/compiler.hxx +++ b/include/formula/compiler.hxx @@ -505,7 +505,8 @@ #define SC_OPCODE_REPLACEB 494 #define SC_OPCODE_FINDB 495 #define SC_OPCODE_SEARCHB 496 -#define SC_OPCODE_STOP_2_PAR 497 /* last function with two or more parameters' OpCode + 1 */ +#define SC_OPCODE_REGEX 497 +#define SC_OPCODE_STOP_2_PAR 498 /* last function with two or more parameters' OpCode + 1 */ #define SC_OPCODE_STOP_FUNCTION SC_OPCODE_STOP_2_PAR /* last function's OpCode + 1 */ #define SC_OPCODE_LAST_OPCODE_ID (SC_OPCODE_STOP_FUNCTION - 1) /* last OpCode */ diff --git a/include/formula/opcode.hxx b/include/formula/opcode.hxx index 437403d919b2..d2c6548e286f 100644 --- a/include/formula/opcode.hxx +++ b/include/formula/opcode.hxx @@ -343,6 +343,7 @@ enum OpCode : sal_uInt16 ocFindB = SC_OPCODE_FINDB, ocSearchB = SC_OPCODE_SEARCHB, ocNumberValue = SC_OPCODE_NUMBERVALUE, + ocRegex = SC_OPCODE_REGEX, // Matrix functions ocMatValue = SC_OPCODE_MAT_VALUE, ocMatDet = SC_OPCODE_MAT_DET, @@ -808,6 +809,7 @@ inline std::string OpCodeEnumToString(OpCode eCode) case ocText: return "Text"; case ocSubstitute: return "Substitute"; case ocRept: return "Rept"; + case ocRegex: return "Regex"; case ocConcat: return "Concat"; case ocConcat_MS: return "Concat_MS"; case ocTextJoin_MS: return "TextJoin_MS"; diff --git a/sc/inc/helpids.h b/sc/inc/helpids.h index b0770e69e8f5..283057e3ccce 100644 --- a/sc/inc/helpids.h +++ b/sc/inc/helpids.h @@ -577,6 +577,7 @@ #define HID_FUNC_REPLACEB "SC_HID_FUNC_REPLACEB" #define HID_FUNC_FINDB "SC_HID_FUNC_FINDB" #define HID_FUNC_SEARCHB "SC_HID_FUNC_SEARCHB" +#define HID_FUNC_REGEX "SC_HID_FUNC_REGEX" #endif diff --git a/sc/inc/scfuncs.hrc b/sc/inc/scfuncs.hrc index c469214af76c..5ed0949ca3b8 100644 --- a/sc/inc/scfuncs.hrc +++ b/sc/inc/scfuncs.hrc @@ -3816,6 +3816,18 @@ const char* SC_OPCODE_SUBSTITUTE_ARY[] = NC_("SC_OPCODE_SUBSTITUTE", "Which occurrence of the old text is to be replaced.") }; +// -=*# Resource for function REGEX #*=- +const char* SC_OPCODE_REGEX_ARY[] = +{ + NC_("SC_OPCODE_REGEX", "Matches and optionally replaces text using regular expressions."), + NC_("SC_OPCODE_REGEX", "Text"), + NC_("SC_OPCODE_REGEX", "The text to be operated on."), + NC_("SC_OPCODE_REGEX", "Expression"), + NC_("SC_OPCODE_REGEX", "The regular expression to be matched."), + NC_("SC_OPCODE_REGEX", "Replacement"), + NC_("SC_OPCODE_REGEX", "The replacement text and expression.") +}; + // -=*# Resource for function BASE #*=- const char* SC_OPCODE_BASE_ARY[] = { diff --git a/sc/qa/unit/ucalc.cxx b/sc/qa/unit/ucalc.cxx index c9d092cb54de..2f1e7391f41f 100644 --- a/sc/qa/unit/ucalc.cxx +++ b/sc/qa/unit/ucalc.cxx @@ -2654,6 +2654,7 @@ void Test::testFunctionLists() "MIDB", "NUMBERVALUE", "PROPER", + "REGEX", "REPLACE", "REPLACEB", "REPT", diff --git a/sc/source/core/data/funcdesc.cxx b/sc/source/core/data/funcdesc.cxx index 6f31ab7cf234..5c530b3c6009 100644 --- a/sc/source/core/data/funcdesc.cxx +++ b/sc/source/core/data/funcdesc.cxx @@ -807,7 +807,8 @@ ScFunctionList::ScFunctionList() { SC_OPCODE_ROUNDSIG, ENTRY(SC_OPCODE_ROUNDSIG_ARY), 0, ID_FUNCTION_GRP_MATH, HID_FUNC_ROUNDSIG, 2, { 0, 0 } }, { SC_OPCODE_REPLACEB, ENTRY(SC_OPCODE_REPLACEB_ARY), 0, ID_FUNCTION_GRP_TEXT, HID_FUNC_REPLACEB, 4, { 0, 0, 0, 0 } }, { SC_OPCODE_FINDB, ENTRY(SC_OPCODE_FINDB_ARY), 0, ID_FUNCTION_GRP_TEXT, HID_FUNC_FINDB, 3, { 0, 0, 1 } }, - { SC_OPCODE_SEARCHB, ENTRY(SC_OPCODE_SEARCHB_ARY), 0, ID_FUNCTION_GRP_TEXT, HID_FUNC_SEARCHB, 3, { 0, 0, 1 } } + { SC_OPCODE_SEARCHB, ENTRY(SC_OPCODE_SEARCHB_ARY), 0, ID_FUNCTION_GRP_TEXT, HID_FUNC_SEARCHB, 3, { 0, 0, 1 } }, + { SC_OPCODE_REGEX, ENTRY(SC_OPCODE_REGEX_ARY), 0, ID_FUNCTION_GRP_TEXT, HID_FUNC_REGEX, 3, { 0, 0, 1 } } }; ScFuncDesc* pDesc = nullptr; diff --git a/sc/source/core/inc/interpre.hxx b/sc/source/core/inc/interpre.hxx index 1a93baa9c364..c09f91405923 100644 --- a/sc/source/core/inc/interpre.hxx +++ b/sc/source/core/inc/interpre.hxx @@ -665,6 +665,7 @@ private: void ScText(); void ScSubstitute(); void ScRept(); + void ScRegex(); void ScConcat(); void ScConcat_MS(); void ScTextJoin_MS(); diff --git a/sc/source/core/tool/interpr1.cxx b/sc/source/core/tool/interpr1.cxx index dc23b55a43dc..6708bdce7913 100644 --- a/sc/source/core/tool/interpr1.cxx +++ b/sc/source/core/tool/interpr1.cxx @@ -9222,6 +9222,61 @@ void ScInterpreter::ScSearch() } } +void ScInterpreter::ScRegex() +{ + sal_uInt8 nParamCount = GetByte(); + if (MustHaveParamCount( nParamCount, 2, 3)) + { + bool bReplacement = false; + OUString aReplacement; + if (nParamCount == 3) + { + // A missing argument is not an empty string to replace the match. + if (IsMissing()) + Pop(); + else + { + aReplacement = GetString().getString(); + bReplacement = true; + } + } + + OUString aExpression = GetString().getString(); + OUString aText = GetString().getString(); + + if (nGlobalError != FormulaError::NONE) + { + PushError( nGlobalError); + return; + } + + sal_Int32 nPos = 0; + sal_Int32 nEndPos = aText.getLength(); + utl::SearchParam aParam( aExpression, utl::SearchParam::SearchType::Regexp); + css::util::SearchResult aResult; + utl::TextSearch aSearch( aParam, *ScGlobal::pCharClass); + const bool bMatch = aSearch.SearchForward( aText, &nPos, &nEndPos, &aResult); + if (!bMatch) + PushNoValue(); + else + { + assert(aResult.subRegExpressions >= 1); + if (!bReplacement) + PushString( aText.copy( aResult.startOffset[0], aResult.endOffset[0] - aResult.startOffset[0])); + else + { + /* TODO: global replacement of multiple occurrences, introduce + * extra parameter with flag 'g'? Loop over positions after + * nEndPos until none left? How to keep the offsets in sync + * after replacement? That should be done by + * ReplaceBackReferences(). */ + aSearch.ReplaceBackReferences( aReplacement, aText, aResult); + PushString( aReplacement); + } + } + } +} + void ScInterpreter::ScMid() { if ( MustHaveParamCount( GetByte(), 3 ) ) diff --git a/sc/source/core/tool/interpr4.cxx b/sc/source/core/tool/interpr4.cxx index f5ca631979e6..d044295ee4d3 100644 --- a/sc/source/core/tool/interpr4.cxx +++ b/sc/source/core/tool/interpr4.cxx @@ -4215,6 +4215,7 @@ StackVar ScInterpreter::Interpret() case ocMid : ScMid(); break; case ocText : ScText(); break; case ocSubstitute : ScSubstitute(); break; + case ocRegex : ScRegex(); break; case ocRept : ScRept(); break; case ocConcat : ScConcat(); break; case ocConcat_MS : ScConcat_MS(); break; diff --git a/sc/source/filter/excel/xlformula.cxx b/sc/source/filter/excel/xlformula.cxx index 644a806dc8f3..2af552318efc 100644 --- a/sc/source/filter/excel/xlformula.cxx +++ b/sc/source/filter/excel/xlformula.cxx @@ -639,7 +639,8 @@ static const XclFunctionInfo saFuncTable_OOoLO[] = EXC_FUNCENTRY_OOO( ocForecast_ETS_MUL, 3, 6, 0, "ORG.LIBREOFFICE.FORECAST.ETS.MULT" ), EXC_FUNCENTRY_OOO( ocForecast_ETS_PIM, 3, 7, 0, "ORG.LIBREOFFICE.FORECAST.ETS.PI.MULT" ), EXC_FUNCENTRY_OOO( ocForecast_ETS_STM, 3, 6, 0, "ORG.LIBREOFFICE.FORECAST.ETS.STAT.MULT" ), - EXC_FUNCENTRY_OOO( ocRoundSig, 2, 2, 0, "ORG.LIBREOFFICE.ROUNDSIG" ) + EXC_FUNCENTRY_OOO( ocRoundSig, 2, 2, 0, "ORG.LIBREOFFICE.ROUNDSIG" ), + EXC_FUNCENTRY_OOO( ocRegex, 2, 3, 0, "ORG.LIBREOFFICE.REGEX" ) }; #undef EXC_FUNCENTRY_OOO_IBR diff --git a/sc/source/filter/oox/formulabase.cxx b/sc/source/filter/oox/formulabase.cxx index 57bfb35b65eb..be09c75a6251 100644 --- a/sc/source/filter/oox/formulabase.cxx +++ b/sc/source/filter/oox/formulabase.cxx @@ -910,7 +910,8 @@ static const FunctionData saFuncTableOOoLO[] = { "ORG.LIBREOFFICE.FORECAST.ETS.MULT", "ORG.LIBREOFFICE.FORECAST.ETS.MULT", NOID, NOID, 3, 6, V, { VR, VA, VR }, FuncFlags::MACROCALL_NEW }, { "ORG.LIBREOFFICE.FORECAST.ETS.PI.MULT", "ORG.LIBREOFFICE.FORECAST.ETS.PI.MULT", NOID, NOID, 4, 7, V, { VR, VA, VR }, FuncFlags::MACROCALL_NEW }, { "ORG.LIBREOFFICE.FORECAST.ETS.STAT.MULT", "ORG.LIBREOFFICE.FORECAST.ETS.STAT.MULT", NOID, NOID, 3, 6, V, { VR, VA, VR }, FuncFlags::MACROCALL_NEW }, - { "ORG.LIBREOFFICE.ROUNDSIG", "ORG.LIBREOFFICE.ROUNDSIG", NOID, NOID, 2, 2, V, { RX }, FuncFlags::MACROCALL_NEW } + { "ORG.LIBREOFFICE.ROUNDSIG", "ORG.LIBREOFFICE.ROUNDSIG", NOID, NOID, 2, 2, V, { RX }, FuncFlags::MACROCALL_NEW }, + { "ORG.LIBREOFFICE.REGEX", "ORG.LIBREOFFICE.REGEX", NOID, NOID, 2, 3, V, { RX }, FuncFlags::MACROCALL_NEW } }; _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits