commit 2a1454a8330c1b687b7d814b3366faf9a0645792
Author: Stephan Witt <[email protected]>
Date:   Sun Dec 14 16:29:12 2025 +0100

    #13261 fix wrong locations and lengths of misspelled text portions of Apple 
speller
    
    - change to with UCS4 docstring compatible interface
    - check for strings with unicode characters encoded in UTF16 with multiple 
codes
    - adjust code unit locations and lengths to character locations and lengths 
compatible with UCS4 strings
    
    (cherry picked from commit 69156b01f855adc7b50744df8bfe7fb1bab6722e)
---
 src/AppleSpellChecker.cpp  | 10 ++---
 src/support/AppleSpeller.h | 10 ++---
 src/support/AppleSpeller.m | 94 ++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 89 insertions(+), 25 deletions(-)

diff --git a/src/AppleSpellChecker.cpp b/src/AppleSpellChecker.cpp
index 0a82d38b32..d7b801dfc9 100644
--- a/src/AppleSpellChecker.cpp
+++ b/src/AppleSpellChecker.cpp
@@ -84,7 +84,7 @@ SpellChecker::Result AppleSpellChecker::check(WordLangTuple 
const & word,
        if (!hasDictionary(word.lang()))
                return NO_DICTIONARY;
 
-       string const word_str = to_utf8(word.word());
+       docstring const word_str = word.word();
        string const lang = d->languageMap[word.lang()->lang()];
 
        vector<WordLangTuple>::const_iterator it = docdict.begin();
@@ -114,7 +114,7 @@ void AppleSpellChecker::advanceChangeNumber()
 // add to personal dictionary
 void AppleSpellChecker::insert(WordLangTuple const & word)
 {
-       string const word_str = to_utf8(word.word());
+       docstring const word_str = word.word();
        AppleSpeller_learn(d->speller, word_str.c_str());
        LYXERR(Debug::GUI, "learn word: \"" << word.word() << "\"");
        advanceChangeNumber();
@@ -124,7 +124,7 @@ void AppleSpellChecker::insert(WordLangTuple const & word)
 // remove from personal dictionary
 void AppleSpellChecker::remove(WordLangTuple const & word)
 {
-       string const word_str = to_utf8(word.word());
+       docstring const word_str = word.word();
        AppleSpeller_unlearn(d->speller, word_str.c_str());
        LYXERR(Debug::GUI, "unlearn word: \"" << word.word() << "\"");
        advanceChangeNumber();
@@ -134,7 +134,7 @@ void AppleSpellChecker::remove(WordLangTuple const & word)
 // ignore for session
 void AppleSpellChecker::accept(WordLangTuple const & word)
 {
-       string const word_str = to_utf8(word.word());
+       docstring const word_str = word.word();
        AppleSpeller_ignore(d->speller, word_str.c_str());
        LYXERR(Debug::GUI, "ignore word: \"" << word.word() << "\"");
        advanceChangeNumber();
@@ -145,7 +145,7 @@ void AppleSpellChecker::suggest(WordLangTuple const & wl,
        docstring_list & suggestions)
 {
        suggestions.clear();
-       string const word_str = to_utf8(wl.word());
+       docstring const word_str = wl.word();
        size_t num = AppleSpeller_makeSuggestion(d->speller,
                                        word_str.c_str(), 
wl.lang()->code().c_str());
        for (size_t i = 0; i < num; i++) {
diff --git a/src/support/AppleSpeller.h b/src/support/AppleSpeller.h
index 5910fba142..62f3fe2de6 100644
--- a/src/support/AppleSpeller.h
+++ b/src/support/AppleSpeller.h
@@ -28,12 +28,12 @@ typedef struct AppleSpellerRec * AppleSpeller ;
 AppleSpeller newAppleSpeller(void);
 void freeAppleSpeller(AppleSpeller speller);
 
-SpellCheckResult AppleSpeller_check(AppleSpeller speller, const char * word, 
const char * lang);
-void AppleSpeller_ignore(AppleSpeller speller, const char * word);
-size_t AppleSpeller_makeSuggestion(AppleSpeller speller, const char * word, 
const char * lang);
+SpellCheckResult AppleSpeller_check(AppleSpeller speller, const wchar_t * 
word, const char * lang);
+void AppleSpeller_ignore(AppleSpeller speller, const wchar_t * word);
+size_t AppleSpeller_makeSuggestion(AppleSpeller speller, const wchar_t * word, 
const char * lang);
 const char * AppleSpeller_getSuggestion(AppleSpeller speller, size_t pos);
-void AppleSpeller_learn(AppleSpeller speller, const char * word);
-void AppleSpeller_unlearn(AppleSpeller speller, const char * word);
+void AppleSpeller_learn(AppleSpeller speller, const wchar_t * word);
+void AppleSpeller_unlearn(AppleSpeller speller, const wchar_t * word);
 int AppleSpeller_hasLanguage(AppleSpeller speller, const char * lang);
 int AppleSpeller_numMisspelledWords(AppleSpeller speller);
 void AppleSpeller_misspelledWord(AppleSpeller speller, int index, int * start, 
int * length);
diff --git a/src/support/AppleSpeller.m b/src/support/AppleSpeller.m
index ce8b7a96b4..593999812b 100644
--- a/src/support/AppleSpeller.m
+++ b/src/support/AppleSpeller.m
@@ -12,6 +12,8 @@
 
 #import <AvailabilityMacros.h>
 
+#include <wchar.h>
+
 #include "support/AppleSpeller.h"
 
 typedef struct AppleSpellerRec {
@@ -48,9 +50,18 @@ void freeAppleSpeller(AppleSpeller speller)
 }
 
 
-static NSString * toString(const char * word)
+static NSString * toString(const char * lang)
+{
+       return [[NSString alloc] initWithBytes:lang length:strlen(lang) 
encoding:NSUTF8StringEncoding];
+}
+
+
+NSString * wcharToString(const wchar_t* text, NSUInteger length)
 {
-       return [[NSString alloc] initWithBytes:word length:strlen(word) 
encoding:NSUTF8StringEncoding];
+       BOOL lendian = NSHostByteOrder() == NS_LittleEndian;
+       NSUInteger bytes = length*sizeof(wchar_t);
+
+       return [[NSString alloc] initWithBytes:text length:bytes 
encoding:(lendian ? NSUTF32LittleEndianStringEncoding : 
NSUTF32BigEndianStringEncoding)];
 }
 
 
@@ -77,22 +88,70 @@ static NSString * toLanguage(AppleSpeller speller, const 
char * lang)
 }
 
 
-SpellCheckResult AppleSpeller_check(AppleSpeller speller, const char * word, 
const char * lang)
+BOOL surrorate(unichar curr, unichar next) {
+       return 0xD800 <= curr && curr <= 0xDBFF &&
+               0xDC00 <= next && next <= 0xDFFF;
+}
+
+
+NSArray * AppleSpeller_adjustPositions(NSArray * misspelled,
+       NSString * text, NSUInteger length)
+{
+       NSUInteger r = 0;
+       NSUInteger i = 0;
+       NSUInteger rcount = [misspelled count];
+       NSRange range = [[misspelled objectAtIndex:r] rangeValue];
+       NSUInteger rstart = range.location;
+       NSUInteger rend   = range.location+range.length;
+       NSUInteger asurrogates = 0;
+       NSUInteger rsurrogates = 0;
+       NSMutableArray * result = [NSMutableArray arrayWithCapacity:rcount+1];
+       unichar curr = [text characterAtIndex:i];
+
+       while (i < length-1 && r < rcount) {
+               if (i == rstart) {
+                       range.location -= asurrogates;
+                       rsurrogates = 0;
+               } else if (i == rend) {
+                       range.length -= rsurrogates;
+                       [result addObject:[NSValue valueWithRange:range]];
+                       if (++r < rcount) {
+                               range = [[misspelled objectAtIndex:r] 
rangeValue];
+                               rstart = range.location;
+                               rend   = range.location+range.length;
+                       }
+               }
+               unichar prev = curr;
+               curr = [text characterAtIndex:++i];
+               if (surrorate(prev, curr) && i < length-1) {
+                       curr = [text characterAtIndex:++i];
+                       asurrogates++;
+                       rsurrogates++;
+               }
+       }
+       return result;
+}
+
+
+SpellCheckResult AppleSpeller_check(AppleSpeller speller,
+       const wchar_t * word, const char * lang)
 {
        if (!speller->checker || !lang || !word)
                return SPELL_CHECK_FAILED;
 
        NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];
-       NSString * word_ = toString(word);
+       NSUInteger wlength = wcslen(word);
+       NSString * word_ = wcharToString(word, wlength);
        NSString * lang_ = toString(lang);
        SpellCheckResult result = SPELL_CHECK_FAILED;
-       int start = 0;
-       int length = [word_ length];
+       NSUInteger start = 0;
+       NSUInteger ulength = [word_ length];
 
        [speller->misspelled release];
        speller->misspelled = nil;
+       BOOL surrogates = ulength > wlength;
 
-       while (result == SPELL_CHECK_FAILED && start < length) {
+       while (result == SPELL_CHECK_FAILED && start < ulength) {
                NSRange match = [speller->checker
                        checkSpellingOfString:word_
                        startingAt:start
@@ -117,6 +176,11 @@ SpellCheckResult AppleSpeller_check(AppleSpeller speller, 
const char * word, con
                        start = match.location + match.length + 1;
                }
        }
+       if ([speller->misspelled count] > 0 && surrogates) {
+               NSArray * misspelled = 
AppleSpeller_adjustPositions(speller->misspelled, word_, ulength);
+               [speller->misspelled release];
+               speller->misspelled = [[NSArray arrayWithArray:misspelled] 
retain];
+       }
 
        [word_ release];
        [lang_ release];
@@ -126,10 +190,10 @@ SpellCheckResult AppleSpeller_check(AppleSpeller speller, 
const char * word, con
 }
 
 
-void AppleSpeller_ignore(AppleSpeller speller, const char * word)
+void AppleSpeller_ignore(AppleSpeller speller, const wchar_t * word)
 {
        NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];
-       NSString * word_ = toString(word);
+       NSString * word_ = wcharToString(word, wcslen(word));
 
        [speller->checker ignoreWord:word_ 
inSpellDocumentWithTag:(speller->doctag)];
 
@@ -138,13 +202,13 @@ void AppleSpeller_ignore(AppleSpeller speller, const char 
* word)
 }
 
 
-size_t AppleSpeller_makeSuggestion(AppleSpeller speller, const char * word, 
const char * lang)
+size_t AppleSpeller_makeSuggestion(AppleSpeller speller, const wchar_t * word, 
const char * lang)
 {
        if (!speller->checker || !word || !lang)
                return 0;
 
        NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];
-       NSString * word_ = toString(word);
+       NSString * word_ = wcharToString(word, wcslen(word));
        NSString * lang_ = toString(lang);
        NSArray * result ;
 
@@ -177,10 +241,10 @@ const char * AppleSpeller_getSuggestion(AppleSpeller 
speller, size_t pos)
 }
 
 
-void AppleSpeller_learn(AppleSpeller speller, const char * word)
+void AppleSpeller_learn(AppleSpeller speller, const wchar_t * word)
 {
        NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];
-       NSString * word_ = toString(word);
+       NSString * word_ = wcharToString(word, wcslen(word));
 
        if ([NSSpellChecker instancesRespondToSelector:@selector(learnWord:)])
                [speller->checker learnWord:word_];
@@ -190,10 +254,10 @@ void AppleSpeller_learn(AppleSpeller speller, const char 
* word)
 }
 
 
-void AppleSpeller_unlearn(AppleSpeller speller, const char * word)
+void AppleSpeller_unlearn(AppleSpeller speller, const wchar_t * word)
 {
        NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];
-       NSString * word_ = toString(word);
+       NSString * word_ = wcharToString(word, wcslen(word));
 
        if ([NSSpellChecker instancesRespondToSelector:@selector(unlearnWord:)])
                [speller->checker unlearnWord:word_];
-- 
lyx-cvs mailing list
[email protected]
https://lists.lyx.org/mailman/listinfo/lyx-cvs

Reply via email to