cui/inc/strings.hrc | 1 cui/source/inc/autocdlg.hxx | 2 cui/source/tabpages/autocdlg.cxx | 15 +++ editeng/source/misc/acorrcfg.cxx | 18 +++ editeng/source/misc/svxacorr.cxx | 18 +++ editeng/source/misc/swafopt.cxx | 1 include/editeng/svxacorr.hxx | 6 + include/editeng/swafopt.hxx | 1 include/svl/urihelper.hxx | 5 + officecfg/registry/schema/org/openoffice/Office/Common.xcs | 10 ++ officecfg/registry/schema/org/openoffice/Office/Writer.xcs | 8 + svl/qa/unit/test_URIHelper.cxx | 54 +++++++++++ svl/source/misc/urihelper.cxx | 59 +++++++++++++ sw/inc/comcore.hxx | 3 sw/inc/utlui.hrc | 3 sw/source/core/edit/autofmt.cxx | 9 + sw/source/uibase/docvw/edtwin.cxx | 5 - sw/source/uibase/shells/textsh.cxx | 3 sw/source/uibase/wrtsh/wrtsh1.cxx | 3 19 files changed, 210 insertions(+), 14 deletions(-)
New commits: commit a772976f047882918d5386a3ef9226c4aa2aa118 Author: Baole Fang <baole.f...@gmail.com> AuthorDate: Tue Apr 25 00:33:01 2023 -0400 Commit: Stephan Bergmann <sberg...@redhat.com> CommitDate: Thu Apr 27 15:27:33 2023 +0200 tdf#145925: Add DOI recognition Detect DOI string in the form of "doi:10.*" and add hyperlink to it. It works the same way as url recognition. Change-Id: I3c4e78a110fd81ad7e727d5e9acee7e51127466a Reviewed-on: https://gerrit.libreoffice.org/c/core/+/150954 Tested-by: Jenkins Reviewed-by: Heiko Tietze <heiko.tie...@documentfoundation.org> Reviewed-by: Stephan Bergmann <sberg...@redhat.com> diff --git a/cui/inc/strings.hrc b/cui/inc/strings.hrc index 498e60e75bc2..442ac515ef8c 100644 --- a/cui/inc/strings.hrc +++ b/cui/inc/strings.hrc @@ -332,6 +332,7 @@ #define RID_CUISTR_BOLD_UNDER NC_("RID_SVXSTR_BOLD_UNDER", "Automatic *bold*, /italic/, -strikeout- and _underline_") #define RID_CUISTR_NO_DBL_SPACES NC_("RID_SVXSTR_NO_DBL_SPACES", "Ignore double spaces") #define RID_CUISTR_DETECT_URL NC_("RID_SVXSTR_DETECT_URL", "URL Recognition") +#define RID_CUISTR_DETECT_DOI NC_("RID_SVXSTR_DETECT_DOI", "DOI citation recognition") #define RID_CUISTR_DASH NC_("RID_SVXSTR_DASH", "Replace dashes") #define RID_CUISTR_CORRECT_ACCIDENTAL_CAPS_LOCK NC_("RID_SVXSTR_CORRECT_ACCIDENTAL_CAPS_LOCK", "Correct accidental use of cAPS LOCK key") #define RID_CUISTR_NON_BREAK_SPACE NC_("RID_SVXSTR_NON_BREAK_SPACE", "Add non-breaking space before specific punctuation marks in French text") diff --git a/cui/source/inc/autocdlg.hxx b/cui/source/inc/autocdlg.hxx index 7b4842314779..2357677ef65f 100644 --- a/cui/source/inc/autocdlg.hxx +++ b/cui/source/inc/autocdlg.hxx @@ -57,6 +57,7 @@ private: OUString m_sStartCap; OUString m_sBoldUnderline; OUString m_sURL; + OUString m_sDOI; OUString m_sNoDblSpaces; OUString m_sDash; OUString m_sAccidentalCaps; @@ -92,6 +93,7 @@ class OfaSwAutoFmtOptionsPage : public SfxTabPage OUString sNoDblSpaces; OUString sCorrectCapsLock; OUString sDetectURL; + OUString sDetectDOI; OUString sDash; OUString sRightMargin; OUString sNum; diff --git a/cui/source/tabpages/autocdlg.cxx b/cui/source/tabpages/autocdlg.cxx index aea3f7d9727e..38a261ad5737 100644 --- a/cui/source/tabpages/autocdlg.cxx +++ b/cui/source/tabpages/autocdlg.cxx @@ -189,6 +189,7 @@ OfaAutocorrOptionsPage::OfaAutocorrOptionsPage(weld::Container* pPage, weld::Dia , m_sStartCap(CuiResId(RID_CUISTR_CPTL_STT_SENT)) , m_sBoldUnderline(CuiResId(RID_CUISTR_BOLD_UNDER)) , m_sURL(CuiResId(RID_CUISTR_DETECT_URL)) + , m_sDOI(CuiResId(RID_CUISTR_DETECT_DOI)) , m_sNoDblSpaces(CuiResId(RID_CUISTR_NO_DBL_SPACES)) , m_sDash(CuiResId(RID_CUISTR_DASH)) , m_sAccidentalCaps(CuiResId(RID_CUISTR_CORRECT_ACCIDENTAL_CAPS_LOCK)) @@ -223,6 +224,7 @@ bool OfaAutocorrOptionsPage::FillItemSet( SfxItemSet* ) pAutoCorrect->SetAutoCorrFlag(ACFlags::CapitalStartSentence, m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE); pAutoCorrect->SetAutoCorrFlag(ACFlags::ChgWeightUnderl, m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE); pAutoCorrect->SetAutoCorrFlag(ACFlags::SetINetAttr, m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE); + pAutoCorrect->SetAutoCorrFlag(ACFlags::SetDOIAttr, m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE); pAutoCorrect->SetAutoCorrFlag(ACFlags::ChgToEnEmDash, m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE); pAutoCorrect->SetAutoCorrFlag(ACFlags::IgnoreDoubleSpace, m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE); pAutoCorrect->SetAutoCorrFlag(ACFlags::CorrectCapsLock, m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE); @@ -263,6 +265,7 @@ void OfaAutocorrOptionsPage::Reset( const SfxItemSet* ) InsertEntry(m_sStartCap); InsertEntry(m_sBoldUnderline); InsertEntry(m_sURL); + InsertEntry(m_sDOI); InsertEntry(m_sDash); InsertEntry(m_sNoDblSpaces); InsertEntry(m_sAccidentalCaps); @@ -273,6 +276,7 @@ void OfaAutocorrOptionsPage::Reset( const SfxItemSet* ) m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::CapitalStartSentence) ? TRISTATE_TRUE : TRISTATE_FALSE ); m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::ChgWeightUnderl) ? TRISTATE_TRUE : TRISTATE_FALSE ); m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::SetINetAttr) ? TRISTATE_TRUE : TRISTATE_FALSE ); + m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::SetDOIAttr) ? TRISTATE_TRUE : TRISTATE_FALSE ); m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::ChgToEnEmDash) ? TRISTATE_TRUE : TRISTATE_FALSE ); m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::IgnoreDoubleSpace) ? TRISTATE_TRUE : TRISTATE_FALSE ); m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::CorrectCapsLock) ? TRISTATE_TRUE : TRISTATE_FALSE ); @@ -333,6 +337,7 @@ enum OfaAutoFmtOptions BEGIN_UPPER, BOLD_UNDERLINE, DETECT_URL, + DETECT_DOI, REPLACE_DASHES, DEL_SPACES_AT_STT_END, DEL_SPACES_BETWEEN_LINES, @@ -363,6 +368,7 @@ OfaSwAutoFmtOptionsPage::OfaSwAutoFmtOptionsPage(weld::Container* pPage, weld::D , sNoDblSpaces(CuiResId(RID_CUISTR_NO_DBL_SPACES)) , sCorrectCapsLock(CuiResId(RID_CUISTR_CORRECT_ACCIDENTAL_CAPS_LOCK)) , sDetectURL(CuiResId(RID_CUISTR_DETECT_URL)) + , sDetectDOI(CuiResId(RID_CUISTR_DETECT_DOI)) , sDash(CuiResId(RID_CUISTR_DASH)) , sRightMargin(CuiResId(RID_CUISTR_RIGHT_MARGIN)) , sNum(CuiResId(RID_CUISTR_NUM)) @@ -455,6 +461,12 @@ bool OfaSwAutoFmtOptionsPage::FillItemSet( SfxItemSet* ) pAutoCorrect->SetAutoCorrFlag(ACFlags::SetINetAttr, m_xCheckLB->get_toggle(DETECT_URL, CBCOL_SECOND) == TRISTATE_TRUE); + bCheck = m_xCheckLB->get_toggle(DETECT_DOI, CBCOL_FIRST) == TRISTATE_TRUE; + bModified |= pOpt->bSetDOIAttr != bCheck; + pOpt->bSetDOIAttr = bCheck; + pAutoCorrect->SetAutoCorrFlag(ACFlags::SetDOIAttr, + m_xCheckLB->get_toggle(DETECT_DOI, CBCOL_SECOND) == TRISTATE_TRUE); + bCheck = m_xCheckLB->get_toggle(DEL_EMPTY_NODE, CBCOL_FIRST) == TRISTATE_TRUE; bModified |= pOpt->bDelEmptyNode != bCheck; pOpt->bDelEmptyNode = bCheck; @@ -558,6 +570,7 @@ void OfaSwAutoFmtOptionsPage::Reset( const SfxItemSet* ) CreateEntry(sCapitalStartSentence, CBCOL_BOTH ); CreateEntry(sBoldUnder, CBCOL_BOTH ); CreateEntry(sDetectURL, CBCOL_BOTH ); + CreateEntry(sDetectDOI, CBCOL_BOTH ); CreateEntry(sDash, CBCOL_BOTH ); CreateEntry(sDelSpaceAtSttEnd, CBCOL_BOTH ); CreateEntry(sDelSpaceBetweenLines, CBCOL_BOTH ); @@ -583,6 +596,8 @@ void OfaSwAutoFmtOptionsPage::Reset( const SfxItemSet* ) m_xCheckLB->set_toggle(BOLD_UNDERLINE, bool(nFlags & ACFlags::ChgWeightUnderl) ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_SECOND); m_xCheckLB->set_toggle(DETECT_URL, pOpt->bSetINetAttr ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_FIRST); m_xCheckLB->set_toggle(DETECT_URL, bool(nFlags & ACFlags::SetINetAttr) ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_SECOND); + m_xCheckLB->set_toggle(DETECT_DOI, pOpt->bSetDOIAttr ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_FIRST); + m_xCheckLB->set_toggle(DETECT_DOI, bool(nFlags & ACFlags::SetDOIAttr) ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_SECOND); m_xCheckLB->set_toggle(REPLACE_DASHES, pOpt->bChgToEnEmDash ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_FIRST); m_xCheckLB->set_toggle(REPLACE_DASHES, bool(nFlags & ACFlags::ChgToEnEmDash) ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_SECOND); m_xCheckLB->set_toggle(DEL_SPACES_AT_STT_END, pOpt->bAFormatDelSpacesAtSttEnd ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_FIRST); diff --git a/editeng/source/misc/acorrcfg.cxx b/editeng/source/misc/acorrcfg.cxx index 787663a96f22..8603b4347da1 100644 --- a/editeng/source/misc/acorrcfg.cxx +++ b/editeng/source/misc/acorrcfg.cxx @@ -185,9 +185,10 @@ Sequence<OUString> SvxBaseAutoCorrCfg::GetPropertyNames() "DoubleQuoteAtEnd", // 16 "CorrectAccidentalCapsLock", // 17 "TransliterateRTL", // 18 - "ChangeAngleQuotes" // 19 + "ChangeAngleQuotes", // 19 + "SetDOIAttribute", // 20 }; - const int nCount = 20; + const int nCount = 21; Sequence<OUString> aNames(nCount); OUString* pNames = aNames.getArray(); for(int i = 0; i < nCount; i++) @@ -298,6 +299,10 @@ void SvxBaseAutoCorrCfg::Load(bool bInit) if(*o3tl::doAccess<bool>(pValues[nProp])) nFlags |= ACFlags::ChgAngleQuotes; break;//"ChangeAngleQuotes" + case 20: + if(*o3tl::doAccess<bool>(pValues[nProp])) + nFlags |= ACFlags::SetDOIAttr; + break;//"SetDOIAttr", } } } @@ -333,6 +338,7 @@ void SvxBaseAutoCorrCfg::ImplCommit() css::uno::Any(bool(nFlags & ACFlags::ChgWeightUnderl)), // "ChangeUnderlineWeight" css::uno::Any(bool(nFlags & ACFlags::SetINetAttr)), // "SetInetAttribute" + css::uno::Any(bool(nFlags & ACFlags::SetDOIAttr)), // "SetDOIAttr" css::uno::Any(bool(nFlags & ACFlags::ChgOrdinalNumber)), // "ChangeOrdinalNumber" css::uno::Any(bool(nFlags & ACFlags::AddNonBrkSpace)), // "AddNonBreakingSpace" @@ -414,8 +420,9 @@ Sequence<OUString> SvxSwAutoCorrCfg::GetPropertyNames() "Format/ByInput/ApplyNumbering/SpecialCharacter/FontFamily", //44 "Format/ByInput/ApplyNumbering/SpecialCharacter/FontCharset", //45 "Format/ByInput/ApplyNumbering/SpecialCharacter/FontPitch", //46 + "Format/Option/SetDOIAttribute", //47 }; - const int nCount = 47; + const int nCount = 48; Sequence<OUString> aNames(nCount); OUString* pNames = aNames.getArray(); for(int i = 0; i < nCount; i++) @@ -565,6 +572,7 @@ void SvxSwAutoCorrCfg::Load(bool bInit) rSwFlags.aByInputBulletFont.SetPitch(FontPitch(nVal)); } break;// "Format/ByInput/ApplyNumbering/SpecialCharacter/FontPitch", + case 47: rSwFlags.bSetDOIAttr = *o3tl::doAccess<bool>(pValues[nProp]); break; // "Format/Option/SetDOIAttribute", } } } @@ -666,8 +674,10 @@ void SvxSwAutoCorrCfg::ImplCommit() // "Format/ByInput/ApplyNumbering/SpecialCharacter/FontFamily" css::uno::Any(sal_Int32(rSwFlags.aByInputBulletFont.GetCharSet())), // "Format/ByInput/ApplyNumbering/SpecialCharacter/FontCharset" - css::uno::Any(sal_Int32(rSwFlags.aByInputBulletFont.GetPitch()))}); + css::uno::Any(sal_Int32(rSwFlags.aByInputBulletFont.GetPitch())), // "Format/ByInput/ApplyNumbering/SpecialCharacter/FontPitch" + css::uno::Any(rSwFlags.bSetDOIAttr)}); + // "Format/Option/SetDOIAttribute" } void SvxSwAutoCorrCfg::Notify( const Sequence<OUString>& /* aPropertyNames */ ) diff --git a/editeng/source/misc/svxacorr.cxx b/editeng/source/misc/svxacorr.cxx index 5e229ba10d24..68743034e66b 100644 --- a/editeng/source/misc/svxacorr.cxx +++ b/editeng/source/misc/svxacorr.cxx @@ -289,6 +289,7 @@ ACFlags SvxAutoCorrect::GetDefaultFlags() | ACFlags::ChgAngleQuotes | ACFlags::ChgWeightUnderl | ACFlags::SetINetAttr + | ACFlags::SetDOIAttr | ACFlags::ChgQuotes | ACFlags::SaveWordCplSttLst | ACFlags::SaveWordWordStartLst @@ -752,6 +753,18 @@ bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt, return bRet; } +// DOI citation recognition +bool SvxAutoCorrect::FnSetDOIAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt, + sal_Int32 nSttPos, sal_Int32 nEndPos, + LanguageType eLang ) +{ + OUString sURL( URIHelper::FindFirstDOIInText( rTxt, nSttPos, nEndPos, GetCharClass( eLang ) )); + bool bRet = !sURL.isEmpty(); + if( bRet ) // so, set attribute: + rDoc.SetINetAttr( nSttPos, nEndPos, sURL ); + return bRet; +} + // Automatic *bold*, /italic/, -strikeout- and _underline_ bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt, sal_Int32 nEndPos ) @@ -1609,7 +1622,10 @@ void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt, FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) || ( IsAutoCorrFlag( ACFlags::SetINetAttr ) && ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) && - FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ) + FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) || + ( IsAutoCorrFlag( ACFlags::SetDOIAttr ) && + ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) && + FnSetDOIAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ) ; else { diff --git a/editeng/source/misc/swafopt.cxx b/editeng/source/misc/swafopt.cxx index 293554589438..f6de4098d2e4 100644 --- a/editeng/source/misc/swafopt.cxx +++ b/editeng/source/misc/swafopt.cxx @@ -36,6 +36,7 @@ SvxSwAutoFormatFlags::SvxSwAutoFormatFlags() bChgToEnEmDash = bChgWeightUnderl = bSetINetAttr = + bSetDOIAttr = bAFormatDelSpacesAtSttEnd = bAFormatDelSpacesBetweenLines = bAFormatByInpDelSpacesAtSttEnd = diff --git a/include/editeng/svxacorr.hxx b/include/editeng/svxacorr.hxx index 39a3d4c65f81..eddf8c9296a1 100644 --- a/include/editeng/svxacorr.hxx +++ b/include/editeng/svxacorr.hxx @@ -76,13 +76,14 @@ enum class ACFlags : sal_uInt32 { CorrectCapsLock = 0x00002000, // Correct accidental use of cAPS LOCK key TransliterateRTL = 0x00004000, // Transliterate RTL text ChgAngleQuotes = 0x00008000, // >>, << -> angle quotes in some languages + SetDOIAttr = 0x00010000, // Set DOIAttribut ChgWordLstLoad = 0x20000000, // Replacement list loaded CplSttLstLoad = 0x40000000, // Exception list for Capital letters Start loaded WordStartLstLoad = 0x80000000, // Exception list for Word Start loaded }; namespace o3tl { - template<> struct typed_flags<ACFlags> : is_typed_flags<ACFlags, 0xe000ffff> {}; + template<> struct typed_flags<ACFlags> : is_typed_flags<ACFlags, 0xe001ffff> {}; } enum class ACQuotes @@ -416,6 +417,9 @@ public: bool FnSetINetAttr( SvxAutoCorrDoc&, const OUString&, sal_Int32 nSttPos, sal_Int32 nEndPos, LanguageType eLang ); + bool FnSetDOIAttr( SvxAutoCorrDoc&, const OUString&, + sal_Int32 nSttPos, sal_Int32 nEndPos, + LanguageType eLang ); bool FnChgWeightUnderl( SvxAutoCorrDoc&, const OUString&, sal_Int32 nEndPos ); void FnCapitalStartSentence( SvxAutoCorrDoc&, const OUString&, bool bNormalPos, diff --git a/include/editeng/swafopt.hxx b/include/editeng/swafopt.hxx index 180ba6d1be67..71919383da96 100644 --- a/include/editeng/swafopt.hxx +++ b/include/editeng/swafopt.hxx @@ -109,6 +109,7 @@ struct EDITENG_DLLPUBLIC SvxSwAutoFormatFlags bool bChgAngleQuotes : 1; bool bChgWeightUnderl : 1; bool bSetINetAttr : 1; + bool bSetDOIAttr : 1; bool bSetBorder : 1; bool bCreateTable : 1; diff --git a/include/svl/urihelper.hxx b/include/svl/urihelper.hxx index 9f8588c97295..68843c17a85c 100644 --- a/include/svl/urihelper.hxx +++ b/include/svl/urihelper.hxx @@ -120,6 +120,11 @@ SVL_DLLPUBLIC OUString FindFirstURLInText(OUString const & rText, INetURLObject::EncodeMechanism eMechanism = INetURLObject::EncodeMechanism::WasEncoded, rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8); +SVL_DLLPUBLIC OUString FindFirstDOIInText(OUString const & rText, + sal_Int32 & rBegin, + sal_Int32 & rEnd, + CharClass const & rCharClass); + /** Remove any password component from both absolute and relative URLs. @ATT The current implementation will not remove a password from a diff --git a/officecfg/registry/schema/org/openoffice/Office/Common.xcs b/officecfg/registry/schema/org/openoffice/Office/Common.xcs index 9beda4a62c81..3bba44384b37 100644 --- a/officecfg/registry/schema/org/openoffice/Office/Common.xcs +++ b/officecfg/registry/schema/org/openoffice/Office/Common.xcs @@ -1335,6 +1335,16 @@ </info> <value>true</value> </prop> + <prop oor:name="SetDOIAttribute" oor:type="xs:boolean" oor:nillable="false"> + <!-- UIHints: Tools AutoCorrect/AutoFormat Options - DOI + recognition --> + <info> + <desc>Specifies if character strings which could represent a DOI + should be converted to a hyperlink.</desc> + <label>Detect DOI</label> + </info> + <value>true</value> + </prop> <prop oor:name="ChangeOrdinalNumber" oor:type="xs:boolean" oor:nillable="false"> <!-- UIHints: Tools AutoCorrect/AutoFormat Options - Replace 1st... --> diff --git a/officecfg/registry/schema/org/openoffice/Office/Writer.xcs b/officecfg/registry/schema/org/openoffice/Office/Writer.xcs index bc84344942f4..7f0f55de7147 100644 --- a/officecfg/registry/schema/org/openoffice/Office/Writer.xcs +++ b/officecfg/registry/schema/org/openoffice/Office/Writer.xcs @@ -4173,6 +4173,14 @@ </info> <value>true</value> </prop> + <prop oor:name="SetDOIAttribute" oor:type="xs:boolean" oor:nillable="false"> + <!-- UIHints: Tools AutoCorrect/AutoFormat Options - DOI citation recognition --> + <info> + <desc>Specifies whether character strings which could represent a DOI are converted to a hyperlink.</desc> + <label>Detect DOI</label> + </info> + <value>true</value> + </prop> <prop oor:name="ChangeOrdinalNumber" oor:type="xs:boolean" oor:nillable="false"> <!-- UIHints: Tools AutoCorrect/AutoFormat Options - Replace 1st... --> <info> diff --git a/svl/qa/unit/test_URIHelper.cxx b/svl/qa/unit/test_URIHelper.cxx index eb5135cbe3c6..df9e5d5114b8 100644 --- a/svl/qa/unit/test_URIHelper.cxx +++ b/svl/qa/unit/test_URIHelper.cxx @@ -181,11 +181,14 @@ public: void testFindFirstURLInText(); + void testFindFirstDOIInText(); + void testResolveIdnaHost(); CPPUNIT_TEST_SUITE(Test); CPPUNIT_TEST(testNormalizedMakeRelative); CPPUNIT_TEST(testFindFirstURLInText); + CPPUNIT_TEST(testFindFirstDOIInText); CPPUNIT_TEST(testResolveIdnaHost); CPPUNIT_TEST(finish); CPPUNIT_TEST_SUITE_END(); @@ -398,6 +401,57 @@ void Test::testFindFirstURLInText() { } } +void Test::testFindFirstDOIInText() { + struct Data { + char const * input; + char const * result; + sal_Int32 begin; + sal_Int32 end; + }; + static Data const tests[] = { + { "doi:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with only digits + { "doi:10.1038/nature03001", "https://doi.org/10.1038/nature03001", 0, 23 }, // valid doi suffix with alphanumeric characters + { "doi:10.1093/ajae/aaq063", "https://doi.org/10.1093/ajae/aaq063", 0, 23 }, // valid doi suffix with multiple slash + { "doi:10.1016/S0735-1097(98)00347-7", "https://doi.org/10.1016/S0735-1097(98)00347-7", 0, 33 }, // valid doi suffix with characters apart from alphanumeric + { "doi:10.109/ajae/aaq063", nullptr, 0, 0 }, // # of digits after doi;10. is not between 4 and 9 + { "doi:10.1234567890/ajae/aaq063", nullptr, 0, 0 }, // # of digits after doi;10. is not between 4 and 9 + { "doi:10.1093/ajae/aaq063/", nullptr, 0, 0 }, // nothing after slash + { "doi:10.1093", nullptr, 0, 0 }, // no slash + { "doi:11.1093/ajae/aaq063", nullptr, 0, 0 }, // doesn't begin with doi:10. + }; + CharClass charClass( m_context, LanguageTag( css::lang::Locale("en", "US", ""))); + for (std::size_t i = 0; i < SAL_N_ELEMENTS(tests); ++i) { + OUString input(OUString::createFromAscii(tests[i].input)); + sal_Int32 begin = 0; + sal_Int32 end = input.getLength(); + OUString result( + URIHelper::FindFirstDOIInText(input, begin, end, charClass)); + bool ok = tests[i].result == nullptr + ? (result.getLength() == 0 && begin == input.getLength() + && end == input.getLength()) + : (result.equalsAscii(tests[i].result) && begin == tests[i].begin + && end == tests[i].end); + OString msg; + if (!ok) { + OStringBuffer buf; + buf.append(OString::Concat("\"") + + tests[i].input + + "\" -> "); + buf.append(tests[i].result == nullptr ? "none" : tests[i].result); + buf.append(" (" + + OString::number(tests[i].begin) + + ", " + + OString::number(tests[i].end) + + ")" + " != " + + OUStringToOString(result, RTL_TEXTENCODING_UTF8) + + " (" + OString::number(begin) + ", " + OString::number(end) +")"); + msg = buf.makeStringAndClear(); + } + CPPUNIT_ASSERT_MESSAGE(msg.getStr(), ok); + } +} + void Test::testResolveIdnaHost() { OUString input; diff --git a/svl/source/misc/urihelper.cxx b/svl/source/misc/urihelper.cxx index 6f121fba56d9..0043b7883a87 100644 --- a/svl/source/misc/urihelper.cxx +++ b/svl/source/misc/urihelper.cxx @@ -745,6 +745,65 @@ OUString URIHelper::FindFirstURLInText(OUString const & rText, return OUString(); } +OUString URIHelper::FindFirstDOIInText(OUString const & rText, + sal_Int32 & rBegin, + sal_Int32 & rEnd, + CharClass const & rCharClass) +{ + if (rBegin > rEnd || rEnd > rText.getLength()) + return OUString(); + + sal_Int32 start = 7; + sal_Int32 count = rEnd-rBegin; + OUString candidate(rText.subView(rBegin, count)); + // Match with regex "doi:10\.\d{4,9}\/[-._;()\/:a-zA-Z0-9]+" + if (candidate.startsWith("doi:10.")) + { + bool flag = true; + sal_Int32 digit = 0; + for (sal_Int32 i=start; i<count; i++) + { + sal_Unicode c = candidate[i]; + // Match 4 to 9 digits before slash + if (digit >= 0) + { + if (digit>9) + { + flag = false; + break; + } + + if ( rCharClass.isDigit(candidate,i) ) + { + digit++; + } + else if (c=='/' && digit>=4 && i<count-1) + { + digit=-1; + } + else + { + flag = false; + break; + } + } + // Match [-._;()\/:a-zA-Z0-9] after slash + else if (!( rCharClass.isAlphaNumeric(candidate, i) || c == '.' || c == '-' || c=='_' || + c==';' || c=='(' || c==')' || c=='\\' || (c=='/' && i<count-1) || c==':')) + { + flag = false; + break; + } + } + if (flag && digit==-1) + { + return candidate.replaceFirst("doi:","https://doi.org/"); + } + } + rBegin = rEnd; + return OUString(); +} + OUString URIHelper::removePassword(OUString const & rURI, INetURLObject::EncodeMechanism eEncodeMechanism, INetURLObject::DecodeMechanism eDecodeMechanism, diff --git a/sw/inc/comcore.hxx b/sw/inc/comcore.hxx index 2fda83a3cbc4..3e43536742d2 100644 --- a/sw/inc/comcore.hxx +++ b/sw/inc/comcore.hxx @@ -43,8 +43,9 @@ #define STR_AUTOFMTREDL_DEL_MORELINES 20 #define STR_AUTOFMTREDL_NON_BREAK_SPACE 21 #define STR_AUTOFMTREDL_TRANSLITERATE_RTL 22 +#define STR_AUTOFMTREDL_DETECT_DOI 23 // !!!!!!!!!!!!!!!!!!!!!!!!!! always set the correct end !!!!!!!!!!!! -#define STR_AUTOFMTREDL_END 23 +#define STR_AUTOFMTREDL_END 24 #endif diff --git a/sw/inc/utlui.hrc b/sw/inc/utlui.hrc index 2d71c113757d..d5998e143514 100644 --- a/sw/inc/utlui.hrc +++ b/sw/inc/utlui.hrc @@ -48,7 +48,8 @@ const TranslateId RID_SHELLRES_AUTOFMTSTRS[] = NC_("RID_SHELLRES_AUTOFMTSTRS", "Set \"Bullet\" or \"Numbering\" Style"), NC_("RID_SHELLRES_AUTOFMTSTRS", "Combine paragraphs"), NC_("RID_SHELLRES_AUTOFMTSTRS", "Add non breaking space"), - NC_("RID_SHELLRES_AUTOFMTSTRS", "Transliterates RTL Hungarian text to Old Hungarian script") + NC_("RID_SHELLRES_AUTOFMTSTRS", "Transliterates RTL Hungarian text to Old Hungarian script"), + NC_("RID_SHELLRES_AUTOFMTSTRS", "DOI citation recognition") }; #endif diff --git a/sw/source/core/edit/autofmt.cxx b/sw/source/core/edit/autofmt.cxx index 0f5d4cd307e9..b63f19b24fac 100644 --- a/sw/source/core/edit/autofmt.cxx +++ b/sw/source/core/edit/autofmt.cxx @@ -2189,7 +2189,11 @@ void SwAutoFormat::AutoCorrect(TextFrameIndex nPos) ( m_aFlags.bSetINetAttr && (nPos == TextFrameIndex(pText->getLength()) || IsSpace((*pText)[sal_Int32(nPos)])) && SetRedlineText( STR_AUTOFMTREDL_DETECT_URL ) && - pATst->FnSetINetAttr(aACorrDoc, *pText, sal_Int32(nLastBlank), sal_Int32(nPos), eLang))) + pATst->FnSetINetAttr(aACorrDoc, *pText, sal_Int32(nLastBlank), sal_Int32(nPos), eLang)) || + ( m_aFlags.bSetDOIAttr && + (nPos == TextFrameIndex(pText->getLength()) || IsSpace((*pText)[sal_Int32(nPos)])) && + SetRedlineText( STR_AUTOFMTREDL_DETECT_DOI ) && + pATst->FnSetDOIAttr(aACorrDoc, *pText, sal_Int32(nLastBlank), sal_Int32(nPos), eLang))) { nPos = m_pCurTextFrame->MapModelToViewPos(*m_aDelPam.GetPoint()); } @@ -2779,7 +2783,8 @@ void SwEditShell::AutoFormatBySplitNode() SvxAutoCorrect* pACorr = SvxAutoCorrCfg::Get().GetAutoCorrect(); if( pACorr && !pACorr->IsAutoCorrFlag( ACFlags::CapitalStartSentence | ACFlags::CapitalStartWord | ACFlags::AddNonBrkSpace | ACFlags::ChgOrdinalNumber | ACFlags::TransliterateRTL | - ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | ACFlags::Autocorrect )) + ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | ACFlags::Autocorrect | + ACFlags::SetDOIAttr )) pACorr = nullptr; if( pACorr ) diff --git a/sw/source/uibase/docvw/edtwin.cxx b/sw/source/uibase/docvw/edtwin.cxx index a15dddf1dec6..5bcbd4a9c843 100644 --- a/sw/source/uibase/docvw/edtwin.cxx +++ b/sw/source/uibase/docvw/edtwin.cxx @@ -2570,7 +2570,8 @@ KEYINPUT_CHECKTABLE_INSDEL: pACorr->IsAutoCorrFlag( ACFlags::CapitalStartSentence | ACFlags::CapitalStartWord | ACFlags::ChgOrdinalNumber | ACFlags::AddNonBrkSpace | ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | - ACFlags::Autocorrect | ACFlags::TransliterateRTL ) && + ACFlags::Autocorrect | ACFlags::TransliterateRTL | + ACFlags::SetDOIAttr ) && '\"' != aCh && '\'' != aCh && '*' != aCh && '_' != aCh ) { @@ -2608,7 +2609,7 @@ KEYINPUT_CHECKTABLE_INSDEL: pACorr->IsAutoCorrFlag( ACFlags::CapitalStartSentence | ACFlags::CapitalStartWord | ACFlags::ChgOrdinalNumber | ACFlags::TransliterateRTL | ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | - ACFlags::Autocorrect ) && + ACFlags::Autocorrect | ACFlags::SetDOIAttr ) && !rSh.HasReadonlySel() ) { FlushInBuffer(); diff --git a/sw/source/uibase/shells/textsh.cxx b/sw/source/uibase/shells/textsh.cxx index 8ecd18555c0e..e7af6d5f3275 100644 --- a/sw/source/uibase/shells/textsh.cxx +++ b/sw/source/uibase/shells/textsh.cxx @@ -167,7 +167,8 @@ void SwTextShell::ExecInsert(SfxRequest &rReq) && pACorr->IsAutoCorrFlag( ACFlags::CapitalStartSentence | ACFlags::CapitalStartWord | ACFlags::AddNonBrkSpace | ACFlags::ChgOrdinalNumber | ACFlags::TransliterateRTL | - ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | ACFlags::Autocorrect ) ) + ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | ACFlags::Autocorrect | + ACFlags::SetDOIAttr ) ) { rSh.AutoCorrect( *pACorr, cIns ); } diff --git a/sw/source/uibase/wrtsh/wrtsh1.cxx b/sw/source/uibase/wrtsh/wrtsh1.cxx index 65f434d45961..0cdd279df379 100644 --- a/sw/source/uibase/wrtsh/wrtsh1.cxx +++ b/sw/source/uibase/wrtsh/wrtsh1.cxx @@ -173,7 +173,8 @@ static SvxAutoCorrect* lcl_IsAutoCorr() SvxAutoCorrect* pACorr = SvxAutoCorrCfg::Get().GetAutoCorrect(); if( pACorr && !pACorr->IsAutoCorrFlag( ACFlags::CapitalStartSentence | ACFlags::CapitalStartWord | ACFlags::AddNonBrkSpace | ACFlags::ChgOrdinalNumber | ACFlags::TransliterateRTL | - ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | ACFlags::Autocorrect )) + ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | ACFlags::Autocorrect | + ACFlags::SetDOIAttr )) pACorr = nullptr; return pACorr; }