Author: tmill
Date: Tue Nov  5 15:57:22 2013
New Revision: 1539040

URL: http://svn.apache.org/r1539040
Log:
Fixes CTAKES-257. Implements two different forms - Dependency Word (DW) and 
Grammatical Relation and Word (GRW) from Nguyen et al. (2009) emnlp paper.

Added:
    
ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/util/AnnotationDepUtils.java

Added: 
ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/util/AnnotationDepUtils.java
URL: 
http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/util/AnnotationDepUtils.java?rev=1539040&view=auto
==============================================================================
--- 
ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/util/AnnotationDepUtils.java
 (added)
+++ 
ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/util/AnnotationDepUtils.java
 Tue Nov  5 15:57:22 2013
@@ -0,0 +1,331 @@
+package org.apache.ctakes.dependency.parser.util;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.log4j.Logger;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+public class AnnotationDepUtils {
+  private static Logger logger = Logger.getLogger(AnnotationDepUtils.class);
+
+  public static String getTokenRelTreeString(JCas jCas, 
List<ConllDependencyNode> nodes, Annotation[] annotations, String[] labels){
+    return getTokenRelTreeString(jCas, nodes, annotations, labels, false);
+  }
+  
+  public static String getTokenRelTreeString(JCas jCas, 
List<ConllDependencyNode> nodes, Annotation[] annotations, String[] labels, 
boolean getParent){
+    Map<ConllDependencyNode, SimpleTree> node2tree = new 
HashMap<ConllDependencyNode, SimpleTree>();
+    ConllDependencyNode topNode = null;
+    
+    // create a SimpleTree object that corresponds to this dependency tree, 
where the
+    // root is the head of the sentence and the children are all the words 
such that the parent
+    // is their head. In this case every word is represented by its 
relationship as well as
+    // its word
+    for(ConllDependencyNode node : nodes){
+      if(node.getHead() == null){
+        topNode = node;
+        continue;
+        // do absolutely nothing with this -- it covers the whole sentence and 
has no useful info
+//        continue;
+//      }else if(node.getHead().getHead() == null){
+//        topNode = node;
+      }
+           
+      SimpleTree curTree = null;
+      SimpleTree headTree = null;
+      if(!node2tree.containsKey(node)){
+        curTree = SimpleTree.fromString(String.format("(%s %s)", 
node.getDeprel(), node.getCoveredText()));
+        node2tree.put(node, curTree);
+      }else{
+        curTree = node2tree.get(node);
+      }
+
+
+      if(curTree.parent == null && node.getHead() != null){
+        if(node2tree.containsKey(node.getHead())){
+          headTree = node2tree.get(node.getHead());
+        }else{
+          String token = node.getHead().getHead() == null ? "TOP" : 
node.getHead().getCoveredText();
+          headTree = SimpleTree.fromString(String.format("(%s %s)", 
node.getHead().getDeprel(), SimpleTree.escapeCat(token)));
+          node2tree.put(node.getHead(), headTree);
+        }
+
+        curTree.parent = headTree.children.get(0);
+        headTree.children.get(0).addChild(curTree);
+      }
+    }
+    
+    ConllDependencyNode highestHead = null;
+    ConllDependencyNode leftmostHead = null;
+    ConllDependencyNode rightmostHead = null;
+    List<SimpleTree> annotationNodes = Lists.newArrayList();
+    
+    // take the set of input annotations and the corresponding labels and 
insert them into the SimpleTree
+    for(int i = 0; i < annotations.length; i++){
+      // get the node representing the head of this annotation
+      List<ConllDependencyNode> coveredNodes = JCasUtil.selectCovered(jCas, 
ConllDependencyNode.class, annotations[i]);
+      if(coveredNodes == null || coveredNodes.size() == 0) continue;
+      ConllDependencyNode headNode = 
DependencyUtility.getNominalHeadNode(coveredNodes);
+      
+      // is this the highest node of all the annotations we're looking at?
+      if(highestHead == null || (distanceFromRoot(headNode) < 
distanceFromRoot(highestHead))){
+        highestHead = headNode;
+      }
+      if(leftmostHead == null || headNode.getBegin() < 
leftmostHead.getBegin()){
+        leftmostHead = headNode;
+      }
+      if(rightmostHead == null || headNode.getEnd() > rightmostHead.getEnd()){
+        rightmostHead = headNode;
+      }
+      
+      SimpleTree insertionPoint = node2tree.get(headNode);
+      SimpleTree insertingTree = new SimpleTree(insertionPoint.cat);
+      insertionPoint.cat = labels[i];
+      insertingTree.children = insertionPoint.children;
+      insertingTree.children.get(0).parent = insertingTree;
+      insertionPoint.children = new ArrayList<SimpleTree>();
+      insertionPoint.addChild(insertingTree);
+      insertingTree.parent = insertionPoint;
+      annotationNodes.add(insertionPoint);
+    }
+    if(highestHead == null) return null;
+    
+    SimpleTree root = node2tree.get(topNode);
+    SimpleTree leftmostNode = getLeftmostNode(root, Sets.newHashSet(labels));
+    SimpleTree rightmostNode = getRightmostNode(root, Sets.newHashSet(labels));
+    SimpleTree pet = getPathEnclosedTree(root, annotationNodes, leftmostNode, 
rightmostNode);
+    if(getParent && pet.parent != null) pet = pet.parent;
+    
+    String treeStr = pet.toString();
+    treeStr = treeStr.replaceAll("\\(([^\\(]+) \\)", "($1 nil)").toLowerCase();
+    
+    return treeStr;
+  }
+  
+  private static SimpleTree getRightmostNode(SimpleTree root, Set<String> 
labels) {
+    SimpleTree node = null;
+    
+    for(int i = root.children.size()-1; i >= 0; i--){
+      if(labels.contains(root.children.get(i).cat)){
+        node = root.children.get(i);
+        break;
+      }
+      node = getRightmostNode(root.children.get(i), labels);
+      if(node != null) break;
+    }
+    
+    return node;
+  }
+
+  private static SimpleTree getLeftmostNode(SimpleTree root, Set<String> 
labels) {
+    SimpleTree node = null;
+    
+    for(int i = 0; i < root.children.size(); i++){
+      if(labels.contains(root.children.get(i).cat)){
+        node = root.children.get(i);
+        break;
+      }
+      node = getLeftmostNode(root.children.get(i), labels);
+      if(node != null) break;
+    }
+    
+    return node;
+  }
+  
+  public static String getTokenTreeString(JCas jCas, List<ConllDependencyNode> 
nodes, Annotation[] annotations, String[] labels, boolean getParent){
+    Map<ConllDependencyNode, SimpleTree> node2tree = getNodeTreeMap(nodes);
+    ConllDependencyNode topNode = getTopNode(nodes);
+    
+    ConllDependencyNode highestHead = null;
+    ConllDependencyNode leftmostHead = null;
+    ConllDependencyNode rightmostHead = null;
+    List<SimpleTree> annotationNodes = Lists.newArrayList();
+    // take the set of input annotations and the corresponding labels and 
insert them into the SimpleTree
+    for(int i = 0; i < annotations.length; i++){
+      // get the node representing the head of this annotation
+      List<ConllDependencyNode> coveredNodes = JCasUtil.selectCovered(jCas, 
ConllDependencyNode.class, annotations[i]);
+      if(coveredNodes == null || coveredNodes.size() == 0) continue;
+      ConllDependencyNode headNode = 
DependencyUtility.getNominalHeadNode(coveredNodes);
+      
+      // is this the highest node of all the annotations we're looking at?
+      if(highestHead == null || (distanceFromRoot(headNode) < 
distanceFromRoot(highestHead))){
+        highestHead = headNode;
+      }
+      if(leftmostHead == null || headNode.getBegin() < 
leftmostHead.getBegin()){
+        leftmostHead = headNode;
+      }
+      if(rightmostHead == null || headNode.getEnd() > rightmostHead.getEnd()){
+        rightmostHead = headNode;
+      }
+      
+      SimpleTree insertionPoint = node2tree.get(headNode);
+      SimpleTree insertingTree = new SimpleTree(insertionPoint.cat);
+      insertionPoint.cat = labels[i];
+      insertingTree.children = insertionPoint.children;
+      insertingTree.children.get(0).parent = insertingTree;
+      insertionPoint.children = new ArrayList<SimpleTree>();
+      insertionPoint.addChild(insertingTree);
+      insertingTree.parent = insertionPoint;
+      annotationNodes.add(insertionPoint);
+    }
+    if(highestHead == null) return null;
+    
+    SimpleTree root = node2tree.get(topNode);
+    SimpleTree leftmostNode = getLeftmostNode(root, Sets.newHashSet(labels));
+    SimpleTree rightmostNode = getRightmostNode(root, Sets.newHashSet(labels));
+    SimpleTree pet = getPathEnclosedTree(root, annotationNodes, leftmostNode, 
rightmostNode);
+    if(getParent && pet.parent != null) pet = pet.parent;
+    
+    String treeStr = pet.toString();
+    treeStr = treeStr.replaceAll("\\(([^\\(]+) \\)", "($1 nil)").toLowerCase();
+    
+    return treeStr;
+
+  }
+  
+  public static SimpleTree getPathEnclosedTree(SimpleTree root, 
List<SimpleTree> annotationNodes, SimpleTree leftmostTree, SimpleTree 
rightmostTree){
+    SimpleTree pet = null;
+    // for the general case (>= 1 annotations) we need to first find the 
common set of ancestors
+    Set<SimpleTree> commonAncestors = getAncestors(annotationNodes.get(0));
+    for(int i = 1; i < annotationNodes.size(); i++){
+      Set<SimpleTree> nodeAncestors = getAncestors(annotationNodes.get(i));
+      commonAncestors = Sets.intersection(commonAncestors, nodeAncestors);
+    }
+    // of the common set, which is the lowest?
+    SimpleTree lowestAncestor = null;
+    for(SimpleTree ancestor : commonAncestors){
+      if(lowestAncestor == null || distanceFromRoot(ancestor) > 
distanceFromRoot(lowestAncestor)){
+        lowestAncestor = ancestor;
+      }
+    }
+    // of the children of the lowest ancestors, which do not contain any of 
the annotations we are
+    // interested in?
+    root = lowestAncestor;
+    SimpleTree curNode = leftmostTree;
+    SimpleTree lastNode = null;
+    while(curNode != root){
+      lastNode = curNode;
+      if(curNode == null || curNode.parent == null){
+        logger.error("Something weird.");
+      }
+      curNode = curNode.parent;
+      if(curNode == null || curNode.children == null){
+        logger.error("Something is null on the left side of the tree in PET!");
+        break;
+      }
+      while(curNode.children.get(0) != lastNode){
+        curNode.children.remove(0);
+      }
+    }
+    
+    curNode = rightmostTree;
+    lastNode = null;
+    while(curNode != root){
+      lastNode = curNode;
+      curNode = curNode.parent;
+      if(curNode == null){
+        logger.error("Something is null on the right side of the tree in 
PET!");
+        break;
+      }
+      if(curNode.children == null || curNode.children.size() == 0){
+        System.err.println("Help");
+      }
+      while(curNode.children.get(curNode.children.size()-1) != lastNode){
+        curNode.children.remove(curNode.children.size()-1);
+        if(curNode.children == null || curNode.children.size() == 0){
+          System.err.println("Help");
+        }
+      }
+    }
+    pet = lowestAncestor;
+    return pet;
+  }
+  
+  private static ConllDependencyNode getTopNode(List<ConllDependencyNode> 
nodes){
+    ConllDependencyNode topNode = null;
+    for(ConllDependencyNode node : nodes){
+      if(node.getHead() == null){
+        topNode = node;
+        break;
+      }
+    }
+    
+    return topNode;
+  }
+  
+  private static Map<ConllDependencyNode, SimpleTree> 
getNodeTreeMap(List<ConllDependencyNode> nodes){
+    Map<ConllDependencyNode, SimpleTree> node2tree = Maps.newHashMap();
+    for(ConllDependencyNode node : nodes){
+      if(node.getHead() == null){
+//        topNode = node;
+        continue;
+        // do absolutely nothing with this -- it covers the whole sentence and 
has no useful info
+//        continue;
+//      }else if(node.getHead().getHead() == null){
+//        topNode = node;
+      }
+           
+      SimpleTree curTree = null;
+      SimpleTree headTree = null;
+      if(!node2tree.containsKey(node)){
+        curTree = SimpleTree.fromString(String.format("(%s %s)", 
node.getDeprel(), node.getCoveredText()));
+        node2tree.put(node, curTree);
+      }else{
+        curTree = node2tree.get(node);
+      }
+
+
+      if(curTree.parent == null && node.getHead() != null){
+        if(node2tree.containsKey(node.getHead())){
+          headTree = node2tree.get(node.getHead());
+        }else{
+          String token = node.getHead().getHead() == null ? "TOP" : 
node.getHead().getCoveredText();
+          headTree = SimpleTree.fromString(String.format("(%s %s)", 
node.getHead().getDeprel(), SimpleTree.escapeCat(token)));
+          node2tree.put(node.getHead(), headTree);
+        }
+
+        curTree.parent = headTree.children.get(0);
+        headTree.children.get(0).addChild(curTree);
+      }
+    }
+    return node2tree;
+  }
+  
+  private static Set<SimpleTree> getAncestors(SimpleTree tree){
+    Set<SimpleTree> ancestors = Sets.newHashSet();
+    while(tree != null){
+      ancestors.add(tree);
+      tree = tree.parent;
+    }
+    return ancestors;
+  }
+  
+  public static int distanceFromRoot(ConllDependencyNode node){
+    int dist = 0;
+    while(node.getHead() != null){
+      dist++;
+      node = node.getHead();
+    }
+    return dist;
+  }
+  
+  public static int distanceFromRoot(SimpleTree tree){
+    int dist = 0;
+    while(tree.parent != null){
+      dist++;
+      tree = tree.parent;
+    }
+    return dist;
+  }
+}


Reply via email to