Author: rfrovarp
Date: Wed Nov 23 22:22:51 2011
New Revision: 1205637

URL: http://svn.apache.org/viewvc?rev=1205637&view=rev
Log:
Uses the Tika metadata to check to see if there was a meta robots tag present 
to indicate that a page shouldn't be followed or indexed.

Modified:
    
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java
    
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java

Modified: 
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java?rev=1205637&r1=1205636&r2=1205637&view=diff
==============================================================================
--- 
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java
 (original)
+++ 
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java
 Wed Nov 23 22:22:51 2011
@@ -12,4 +12,8 @@ public interface TikaParse extends Parse
   public String getXml();
   
   public String getPlainText();
+  
+  public boolean isFollowed();
+  
+  public boolean isIndexed();
 }

Modified: 
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java?rev=1205637&r1=1205636&r2=1205637&view=diff
==============================================================================
--- 
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java
 (original)
+++ 
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java
 Wed Nov 23 22:22:51 2011
@@ -50,4 +50,20 @@ public class TikaParseImpl extends Parse
     return plainText;
   }
 
+  @Override
+  public boolean isFollowed() {
+    if(metadata.get("robots") != null && 
metadata.get("robots").toLowerCase().contains("nofollow")) {
+      return false;
+    }
+    return true;
+  }
+
+  @Override
+  public boolean isIndexed() {
+    if(metadata.get("robots") != null && 
metadata.get("robots").toLowerCase().contains("noindex")) {
+      return false;
+    }
+    return true;
+  }
+
 }


Reply via email to