This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git

commit 8f2f4ea8efb1a66041da960078a13fec134c16f8
Author: Gerben <[email protected]>
AuthorDate: Mon May 25 20:02:41 2020 +0200

    WIP
---
 packages/dom/src/text-quote/describe.ts | 111 +++++++++++++++++++-------------
 1 file changed, 65 insertions(+), 46 deletions(-)

diff --git a/packages/dom/src/text-quote/describe.ts 
b/packages/dom/src/text-quote/describe.ts
index 819fa12..2b85560 100644
--- a/packages/dom/src/text-quote/describe.ts
+++ b/packages/dom/src/text-quote/describe.ts
@@ -23,16 +23,24 @@ import seek from 'dom-seek';
 import { TextQuoteSelector } from '../../../selector/src';
 import { DomScope } from '../types';
 import { ownerDocument, rangeFromScope } from '../scope';
-import { createTextQuoteSelectorMatcher } from './match';
 
 function firstTextNodeInRange(range: Range): Text {
-  const { startContainer } = range;
-
-  if (isTextNode(startContainer)) return startContainer;
-
-  const root = range.commonAncestorContainer;
-  const iter = document.createNodeIterator(root, NodeFilter.SHOW_TEXT);
-  return iter.nextNode() as Text;
+  // Find the first text node inside the range.
+  const iter = document.createNodeIterator(
+    range.commonAncestorContainer,
+    NodeFilter.SHOW_TEXT,
+    {
+      acceptNode(node: Text) {
+        // Only reveal nodes within the range; and skip any empty text nodes.
+        return range.intersectsNode(node) && node.length > 0
+          ? NodeFilter.FILTER_ACCEPT
+          : NodeFilter.FILTER_REJECT
+      },
+    },
+  );
+  const node = iter.nextNode() as Text | null;
+  if (node === null) throw new Error('Range contains no text nodes');
+  return node;
 }
 
 export async function describeTextQuote(
@@ -43,59 +51,50 @@ export async function describeTextQuote(
 
   const result: TextQuoteSelector = { type: 'TextQuoteSelector', exact };
 
-  const { prefix, suffix } = await calculateContextForDisambiguation(range, 
result, scope);
+  const { prefix, suffix } = calculateContextForDisambiguation(range, result, 
scope);
   result.prefix = prefix;
   result.suffix = suffix;
 
   return result
 }
 
-async function calculateContextForDisambiguation(
+function calculateContextForDisambiguation(
   range: Range,
   selector: TextQuoteSelector,
   scope: DomScope,
-): Promise<{ prefix?: string, suffix?: string }> {
-  const scopeAsRange = rangeFromScope(scope);
-  const root = scopeAsRange.commonAncestorContainer;
-  const text = scopeAsRange.toString();
-
-  const matcher = createTextQuoteSelectorMatcher(selector);
-
-  const iter = document.createNodeIterator(root, NodeFilter.SHOW_TEXT);
-
-  const startNode = firstTextNodeInRange(range);
-  const startIndex =
-    isTextNode(range.startContainer)
-      ? seek(iter, startNode) + range.startOffset
-      : seek(iter, startNode);
-  const endIndex = startIndex + selector.exact.length;
+): { prefix?: string, suffix?: string } {
+  const exactText = selector.exact;
+  const scopeText = rangeFromScope(scope).toString();
+  const targetStartIndex = getRangeTextPosition(range, scope);
+  const targetEndIndex = targetStartIndex + exactText.length;
+
+  // Find all matches of the text in the scope.
+  const stringMatches: number[] = [];
+  let fromIndex = 0;
+  while (fromIndex < scopeText.length) {
+    const matchIndex = scopeText.indexOf(exactText, fromIndex);
+    if (matchIndex === -1) break;
+    stringMatches.push(matchIndex);
+    fromIndex = matchIndex + 1;
+  }
 
+  // Count for each undesired match the required prefix and suffix lengths, 
such that either of them
+  // would have invalidated the match.
   const affixLengthPairs: Array<[number, number]> = [];
-
-  for await (const match of matcher(scopeAsRange)) {
-    const matchIter = document.createNodeIterator(root, NodeFilter.SHOW_TEXT);
-
-    const matchStartNode = firstTextNodeInRange(match);
-    const matchStartIndex =
-      isTextNode(match.startContainer)
-        ? seek(matchIter, matchStartNode) + match.startOffset
-        : seek(matchIter, matchStartNode);
-    const matchEndIndex = matchStartIndex + match.toString().length;
-
-    // If the match is the same as the input range, continue.
-    if (matchStartIndex === startIndex || matchEndIndex === endIndex) {
+  for (const matchStartIndex of stringMatches) {
+    // Skip the found match if it is the actual target.
+    if (matchStartIndex === targetStartIndex)
       continue;
-    }
 
     // Count how many characters before & after them the false match and 
target have in common.
     const sufficientPrefixLength = charactersNeededToBeUnique(
-      text.substring(0, startIndex),
-      text.substring(0, matchStartIndex),
+      scopeText.substring(0, targetStartIndex),
+      scopeText.substring(0, matchStartIndex),
       true,
     );
     const sufficientSuffixLength = charactersNeededToBeUnique(
-      text.substring(endIndex),
-      text.substring(matchEndIndex),
+      scopeText.substring(targetStartIndex + exactText.length),
+      scopeText.substring(matchStartIndex + exactText.length),
       false,
     );
     affixLengthPairs.push([sufficientPrefixLength, sufficientSuffixLength]);
@@ -104,8 +103,8 @@ async function calculateContextForDisambiguation(
   // Find the prefix and suffix that would invalidate all mismatches, using 
the minimal characters
   // for prefix and suffix combined.
   const [prefixLength, suffixLength] = minimalSolution(affixLengthPairs);
-  const prefix = text.substring(startIndex - prefixLength, startIndex);
-  const suffix = text.substring(endIndex, endIndex + suffixLength);
+  const prefix = scopeText.substring(targetStartIndex - prefixLength, 
targetStartIndex);
+  const suffix = scopeText.substring(targetEndIndex, targetEndIndex + 
suffixLength);
   return { prefix, suffix };
 }
 
@@ -143,6 +142,26 @@ function minimalSolution(requirements: Array<[number, 
number]>): [number, number
   return pairs[pairs.length - 1];
 }
 
+function getRangeTextPosition(range: Range, scope: DomScope): number {
+  const scopeAsRange = rangeFromScope(scope);
+  const iter = document.createNodeIterator(
+    scopeAsRange.commonAncestorContainer,
+    NodeFilter.SHOW_TEXT,
+    {
+      acceptNode(node: Text) {
+        // Only reveal nodes within the range
+        return scopeAsRange.intersectsNode(node)
+          ? NodeFilter.FILTER_ACCEPT
+          : NodeFilter.FILTER_REJECT
+      },
+    },
+  );
+  if (isTextNode(range.startContainer))
+    return seek(iter, range.startContainer) + range.startOffset;
+  else
+    return seek(iter, firstTextNodeInRange(range));
+}
+
 function isTextNode(node: Node): node is Text {
-  return node.nodeType === Node.TEXT_NODE
+  return node.nodeType === Node.TEXT_NODE;
 }

Reply via email to