This is an automated email from the ASF dual-hosted git repository. gerben pushed a commit to branch dom-tests in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git
commit b3734c0d7b532fb3ba58b31d048a75c23b8dc39e Author: Gerben <[email protected]> AuthorDate: Mon May 25 19:48:06 2020 +0200 Some fixes to satisfy tests Always give a prefix and suffix, as recommended in the spec: “Each TextQuoteSelector SHOULD have exactly 1 prefix property” …so now it is an empty string when no prefix/suffix is needed. I am not sure if this is valuable or a waste a bytes. I’d be equally happy to revert this behaviour. Fix mistakes leading to needless prefix or suffix in some cases, and a lack of them when selecting the first or last characters (partly regressions from c94ccda and 8b29fec, or already faulty before that). --- packages/dom/src/text-quote/describe.ts | 76 ++++++++++++--------------------- 1 file changed, 28 insertions(+), 48 deletions(-) diff --git a/packages/dom/src/text-quote/describe.ts b/packages/dom/src/text-quote/describe.ts index 57a587c..819fa12 100644 --- a/packages/dom/src/text-quote/describe.ts +++ b/packages/dom/src/text-quote/describe.ts @@ -87,66 +87,46 @@ async function calculateContextForDisambiguation( continue; } - // Determine how many prefix characters are shared. - const prefixLength = overlapRight( - text.substring(0, matchStartIndex), + // Count how many characters before & after them the false match and target have in common. + const sufficientPrefixLength = charactersNeededToBeUnique( text.substring(0, startIndex), + text.substring(0, matchStartIndex), + true, ); - - // Determine how many suffix characters are shared. - const suffixLength = overlap( - text.substring(matchEndIndex), + const sufficientSuffixLength = charactersNeededToBeUnique( text.substring(endIndex), + text.substring(matchEndIndex), + false, ); - - // Record the affix lengths that would have precluded this match. - affixLengthPairs.push([prefixLength + 1, suffixLength + 1]); - } - - // Construct and return an unambiguous selector. - let prefix, suffix; - if (affixLengthPairs.length) { - const [prefixLength, suffixLength] = minimalSolution(affixLengthPairs); - - if (prefixLength > 0 && startIndex > 0) { - prefix = text.substring(startIndex - prefixLength, startIndex); - } - - if (suffixLength > 0 && endIndex < text.length) { - suffix = text.substring(endIndex, endIndex + suffixLength); - } + affixLengthPairs.push([sufficientPrefixLength, sufficientSuffixLength]); } + // Find the prefix and suffix that would invalidate all mismatches, using the minimal characters + // for prefix and suffix combined. + const [prefixLength, suffixLength] = minimalSolution(affixLengthPairs); + const prefix = text.substring(startIndex - prefixLength, startIndex); + const suffix = text.substring(endIndex, endIndex + suffixLength); return { prefix, suffix }; } -function overlap(text1: string, text2: string) { - let count = 0; - - while (count < text1.length && count < text2.length) { - const c1 = text1[count]; - const c2 = text2[count]; - if (c1 !== c2) break; - count++; - } - - return count; -} - -function overlapRight(text1: string, text2: string) { - let count = 0; - - while (count < text1.length && count < text2.length) { - const c1 = text1[text1.length - 1 - count]; - const c2 = text2[text2.length - 1 - count]; - if (c1 !== c2) break; - count++; - } - - return count; +function charactersNeededToBeUnique(target: string, impostor: string, reverse: boolean = false) { + // Count how many characters the two strings have in common. + let overlap = 0; + while (reverse + ? target[target.length - 1 - overlap] === impostor[impostor.length - 1 - overlap] + : target[overlap] === impostor[overlap] + ) + overlap++; + if (overlap === target.length) + return Infinity; // (no substring of target can make it distinguishable from its impostor) + else + return overlap + 1; } function minimalSolution(requirements: Array<[number, number]>): [number, number] { + // Ensure we try solutions with an empty prefix or suffix. + requirements.push([0, 0]); + // Build all the pairs and order them by their sums. const pairs = requirements.flatMap(l => requirements.map<[number, number]>(r => [l[0], r[1]])); pairs.sort((a, b) => a[0] + a[1] - (b[0] + b[1]));
