[incubator-annotator] 03/04: Use XPaths to serialise Ranges

gerben Fri, 15 May 2020 09:57:28 -0700

This is an automated email from the ASF dual-hosted git repository.

gerben pushed a commit to branch dom-tests
in repository https://gitbox.apache.org/repos/asf/incubator-annotator.git


commit e79b5ab2da6c3d281226564a20c837ea306c1901
Author: Gerben <[email protected]>
AuthorDate: Fri May 15 18:34:47 2020 +0200

    Use XPaths to serialise Ranges
    
    Looks like I reinvented the wheel:
    https://github.com/openannotation/xpath-range
---
 packages/dom/test/text-quote-match.ts | 87 +++++++++++++++++++++++++++++------
 1 file changed, 72 insertions(+), 15 deletions(-)

diff --git a/packages/dom/test/text-quote-match.ts 
b/packages/dom/test/text-quote-match.ts
index de821bc..2f43600 100644
--- a/packages/dom/test/text-quote-match.ts
+++ b/packages/dom/test/text-quote-match.ts
@@ -32,16 +32,16 @@ const testCases: {
   }
 } = {
   "simple": {
-    html: `<!doctype html><html><head></head><body><b>lorem ipsum dolor amet 
yada yada</b></body></html>`,
+    html: `<b>lorem ipsum dolor amet yada yada</b>`,
     selector: {
       type: 'TextQuoteSelector',
       exact: 'dolor am',
     },
     expected: [
       {
-        startContainer: [1, 1, 0, 0],
+        startContainer: '//b/text()',
         startOffset: 12,
-        endContainer: [1, 1, 0, 0],
+        endContainer: '//b/text()',
         endOffset: 20,
       },
     ]
@@ -54,29 +54,48 @@ describe('createTextQuoteSelectorMatcher', () => {
       const doc = domParser.parseFromString(html, 'text/html');
       const matcher = createTextQuoteSelectorMatcher(selector);
       const matches = await asyncIterableToArray(matcher(doc.body));
-      assert.deepEqual(matches.map(rangeToInfo), expected);
+      assert.deepEqual(
+        matches.map(range => canonicalRangeSerialisation(range)),
+        expected.map(info => canonicalRangeSerialisation(info, doc)),
+      );
     });
   }
 });
 
-// RangeInfo encodes a Range’s start&end containers as their positions in the 
DOM tree.
+// RangeInfo encodes a Range’s start&end containers as XPaths.
 type RangeInfo = {
-  startContainer: number[],
+  startContainer: string,
   startOffset: number,
-  endContainer: number[],
+  endContainer: string,
   endOffset: number,
 };
 
-function rangeToInfo(range: Range | RangeInfo): RangeInfo {
-  if (!('collapsed' in range)) return range;
+function canonicalRangeSerialisation(range: Range): RangeInfo
+function canonicalRangeSerialisation(range: RangeInfo, doc: Document): 
RangeInfo
+function canonicalRangeSerialisation(range: Range | RangeInfo, doc?: 
Document): RangeInfo {
+  if (!('collapsed' in range)) {
+    // range is already serialised; re-serialise to ensure it is canonical.
+    return {
+      ...range,
+      startContainer: createXPath(evaluateXPathOne(doc, range.startContainer)),
+      endContainer: createXPath(evaluateXPathOne(doc, range.endContainer)),
+    };
+  }
   return {
-    startContainer: pathToNode(range.startContainer),
+    startContainer: createXPath(range.startContainer),
     startOffset: range.startOffset,
-    endContainer: pathToNode(range.endContainer),
+    endContainer: createXPath(range.endContainer),
     endOffset: range.endOffset,
   };
 }
 
+function infoToRange(info: RangeInfo | Range, doc: Document): Range {
+  if ('collapsed' in info) return info;
+  const range = document.createRange();
+  range.setStart(evaluateXPathOne(doc, info.startContainer), info.startOffset);
+  range.setEnd(evaluateXPathOne(doc, info.startContainer), info.startOffset);
+}
+
 async function asyncIterableToArray<T>(source: AsyncIterable<T>): Promise<T[]> 
{
   const values = [];
   for await (const value of source) {
@@ -85,12 +104,50 @@ async function asyncIterableToArray<T>(source: 
AsyncIterable<T>): Promise<T[]> {
   return values;
 }
 
-// Return the array of child indexes that leads from the root node to the 
given node.
-function pathToNode(node: Node): number[] {
-  const path: number[] = [];
+// Return an XPath expression for the given node.
+function createXPath(node: Node): string { // wrap the actual function with a 
self-test.
+  const result = _createXPath(node);
+  try {
+    const selfCheck = evaluateXPathAll(node.ownerDocument || node as Document, 
result);
+    assert.deepEqual(selfCheck, [node]);
+  } catch (err) {
+    assert.fail(`Test suite itself created an incorrect XPath: '${result}'`);
+  }
+  return result;
+}
+function _createXPath(node: Node): string {
+  let path = ''
   while (node.parentNode !== null) {
-    path.unshift([...node.parentNode.childNodes].indexOf(node as ChildNode));
+    if (node.nodeType === Node.ELEMENT_NODE) {
+      const name = (node as Element).tagName.toLowerCase();
+      const matchingElements = evaluateXPathAll(node.ownerDocument || node as 
Document, `//${name}`);
+      if (matchingElements.length > 1)
+        return `//${name}[${matchingElements.indexOf(node) + 1}]${path}`;
+      else
+        return `//${name}${path}`;
+    }
+    const childIndex = [...node.parentNode.childNodes].indexOf(node as 
ChildNode);
+    const xpathNodeTypes = {
+      [Node.COMMENT_NODE]: 'comment',
+      [Node.TEXT_NODE]: 'text',
+      [Node.PROCESSING_INSTRUCTION_NODE]: 'processing-instruction',
+    }
+    const nodeType = xpathNodeTypes[node.nodeType] || 'node';
+    path = `/${nodeType}()[${childIndex + 1}]` + path;
     node = node.parentNode;
   }
   return path;
 }
+
+function evaluateXPathAll(doc: Document, xpath: string): Node[] {
+  const result = doc.evaluate(xpath, doc, null, 
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE);
+  return new Array(result.snapshotLength).fill(undefined).map((_, i) => 
result.snapshotItem(i));
+}
+
+function evaluateXPathOne(doc: Document, xpath: string): Node {
+  const nodes = evaluateXPathAll(doc, xpath);
+  assert.equal(nodes.length, 1,
+    `Test suite contains XPath with ${nodes.length} results instead of 1: 
'${xpath}'`
+  );
+  return nodes[0];
+}

[incubator-annotator] 03/04: Use XPaths to serialise Ranges

Reply via email to