Modified: nutch/branches/2.x/src/java/org/apache/nutch/storage/WebPage.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/storage/WebPage.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/storage/WebPage.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/storage/WebPage.java Fri Jan  
9 06:34:33 2015
@@ -1,57 +1,46 @@
 
/*******************************************************************************
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements. See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-******************************************************************************/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ 
******************************************************************************/
 /**
  * Autogenerated by Avro
  * 
  * DO NOT EDIT DIRECTLY
  */
-package org.apache.nutch.storage;  
+package org.apache.nutch.storage;
+
 @SuppressWarnings("all")
 /** WebPage is the primary data structure in Nutch representing crawl data for 
a given WebPage at some point in time */
-public class WebPage extends org.apache.gora.persistency.impl.PersistentBase 
implements org.apache.avro.specific.SpecificRecord, 
org.apache.gora.persistency.Persistent {
-  public static final org.apache.avro.Schema SCHEMA$ = new 
org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"WebPage\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"WebPage
 is the primary data structure in Nutch representing crawl data for a given 
WebPage at some point in 
time\",\"fields\":[{\"name\":\"baseUrl\",\"type\":[\"null\",\"string\"],\"doc\":\"The
 original associated with this 
WebPage.\",\"default\":null},{\"name\":\"status\",\"type\":\"int\",\"doc\":\"A 
crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - 
WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, 
STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily 
redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to 
other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. 
transient errors and STATUS_NOTMODIFIED - fetching successful - page is not 
modified\",\"default\":0},{\"name\"
 :\"fetchTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for 
when the page was 
fetched.\",\"default\":0},{\"name\":\"prevFetchTime\",\"type\":\"long\",\"doc\":\"The
 system time in milliseconds for when the page was last fetched if it was 
previously fetched which can be used to calculate time delta within a fetching 
schedule 
implementation\",\"default\":0},{\"name\":\"fetchInterval\",\"type\":\"int\",\"doc\":\"The
 default number of seconds between re-fetches of a page. The default is 
considered as 30 days unless a custom fetch schedle is 
implemented.\",\"default\":0},{\"name\":\"retriesSinceFetch\",\"type\":\"int\",\"doc\":\"The
 number of retried attempts at fetching the WebPage since it was last 
successfully 
fetched.\",\"default\":0},{\"name\":\"modifiedTime\",\"type\":\"long\",\"doc\":\"The
 system time in milliseconds for when this WebPage was modified by the WebPage 
author, if this is not available we default to the server for this information. 
This is important 
 to understand the changing nature of the 
WebPage.\",\"default\":0},{\"name\":\"prevModifiedTime\",\"type\":\"long\",\"doc\":\"The
 system time in milliseconds for when this WebPage was previously modified by 
the author, if this is not available then we default to the server for this 
information. This is important to understand the changing nature of a 
WebPage.\",\"default\":0},{\"name\":\"protocolStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"doc\":\"A
 nested container representing data captured from web server 
responses.\",\"fields\":[{\"name\":\"code\",\"type\":\"int\",\"doc\":\"A 
protocol response code which can be one of SUCCESS - content was retrieved 
without errors, FAILED - Content was not retrieved. Any further errors may be 
indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application 
may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved 
permanently. New url should be found in args, TEMP_MOVED - Resour
 ce has moved temporarily. New url should be found in args., NOTFOUND - 
Resource was not found, RETRY - Temporary failure. Application may retry 
immediately., EXCEPTION - Unspecified exception occured. Further information 
may be provided in args., ACCESS_DENIED - Access denied - authorization 
required, but missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt 
rules., REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., 
NOTMODIFIED - Unchanged since the last fetch., WOULDBLOCK - Request was refused 
by protocol plugins, because it would block. The expected number of 
milliseconds to wait before retry may be provided in args., BLOCKED - Thread 
was blocked http.max.delays times during 
fetching.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional
 arguments supplied to compliment and/or justify the response 
code.\",\"default\":[]},{\"name\":\"lastModified\",\"type\":\"long\",\"doc\":\"A
 server reponse indicating w
 hen this page was last modified, this can be unreliable at times hence this is 
used as a default fall back value for the preferred 'modifiedTime' and 
'preModifiedTime' obtained from the WebPage 
itself.\",\"default\":0}]}],\"default\":null},{\"name\":\"content\",\"type\":[\"null\",\"bytes\"],\"doc\":\"The
 entire raw document content e.g. raw 
XHTML\",\"default\":null},{\"name\":\"contentType\",\"type\":[\"null\",\"string\"],\"doc\":\"The
 type of the content contained within the document itself. ContentType is an 
alias for MimeType. Historically, this parameter was only called MimeType, but 
since this is actually the value included in the HTTP Content-Type header, it 
can also include the character set encoding, which makes it more than just a 
MimeType specification. If MimeType is specified e.g. not None, that value is 
used. Otherwise, ContentType is used. If neither is given, the 
DEFAULT_CONTENT_TYPE setting is 
used.\",\"default\":null},{\"name\":\"prevSignature\",\"type\":[\"null\",\
 "bytes\"],\"doc\":\"An implementation of a WebPage's previous signature from 
which it can be identified and referenced at any point in time. This can be 
used to uniquely identify WebPage deltas based on page 
fingerprints.\",\"default\":null},{\"name\":\"signature\",\"type\":[\"null\",\"bytes\"],\"doc\":\"An
 implementation of a WebPage's signature from which it can be identified and 
referenced at any point in time. This is essentially the WebPage's fingerprint 
represnting its state for any point in 
time.\",\"default\":null},{\"name\":\"title\",\"type\":[\"null\",\"string\"],\"doc\":\"The
 title of the 
WebPage.\",\"default\":null},{\"name\":\"text\",\"type\":[\"null\",\"string\"],\"doc\":\"The
 textual content of the WebPage devoid from native 
markup.\",\"default\":null},{\"name\":\"parseStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ParseStatus\",\"doc\":\"A
 nested container representing parse status data captured from invocation of 
parsers on fetch of a WebPage\",\"fields
 \":[{\"name\":\"majorCode\",\"type\":\"int\",\"doc\":\"Major parsing status' 
including NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), 
FAILED (General failure. There may be a more specific error message in 
arguments.)\",\"default\":0},{\"name\":\"minorCode\",\"type\":\"int\",\"doc\":\"Minor
 parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or 
issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to 
another URL. The target URL can be retrieved from the arguments., 
FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved 
from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, 
but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - 
Parsing failed. Invalid format e.g. the content may be corrupted or of wrong 
type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the 
content are needed to complete parsing. The list of URLs to missing parts may be
  provided in arguments. The Fetcher may decide to fetch these parts at once, 
then put them into Content.metadata, and supply them for re-parsing., 
FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - 
probably caused by errors at protocol 
stage.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional
 arguments supplied to compliment and/or justify the parse status 
code.\",\"default\":[]}]}],\"default\":null},{\"name\":\"score\",\"type\":\"float\",\"doc\":\"A
 score used to determine a WebPage's relevance within the web graph it is part 
of. This score may change over time based on graph 
characteristics.\",\"default\":0},{\"name\":\"reprUrl\",\"type\":[\"null\",\"string\"],\"doc\":\"In
 the case where we are given two urls, a source and a destination of a 
redirect, we should determine and persist the representative url. The logic 
used to determine this is based largely on Yahoo!'s Slurp 
Crawler\",\"default\":null},{
 
\"name\":\"headers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Header
 information returned from the web server used to server the content which is 
subsequently fetched from. This includes keys such as TRANSFER_ENCODING, 
CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, 
CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and 
LOCATION.\",\"default\":{}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Embedded
 hyperlinks which direct outside of the current 
domain.\",\"default\":{}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Embedded
 hyperlinks which link to pages within the current 
domain.\",\"default\":{}},{\"name\":\"markers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Markers
 flags which represent user and machine decisions which have affected 
influenced a WebPage's current state. Markers can be system 
 specific and user machine driven in nature. They are assigned to a WebPage on 
a job-by-job basis and thier values indicative of what actions should be 
associated with a 
WebPage.\",\"default\":{}},{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"bytes\"]},\"doc\":\"A
 multi-valued metadata container used for storing everything from structured 
WebPage characterists, to ad-hoc extraction and metadata augmentation for any 
given 
WebPage.\",\"default\":{}},{\"name\":\"batchId\",\"type\":[\"null\",\"string\"],\"doc\":\"A
 batchId that this WebPage is assigned to. WebPage's are fetched in batches, 
called fetchlists. Pages are partitioned but can always be associated and 
fetched alongside pages of similar value (within a crawl cycle) based on 
batchId.\",\"default\":null}]}");
+public class WebPage extends org.apache.gora.persistency.impl.PersistentBase
+    implements org.apache.avro.specific.SpecificRecord,
+    org.apache.gora.persistency.Persistent {
+  public static final org.apache.avro.Schema SCHEMA$ = new 
org.apache.avro.Schema.Parser()
+      
.parse("{\"type\":\"record\",\"name\":\"WebPage\",\"namespace\":\"org.apache.nutch.storage\",\"doc\":\"WebPage
 is the primary data structure in Nutch representing crawl data for a given 
WebPage at some point in 
time\",\"fields\":[{\"name\":\"baseUrl\",\"type\":[\"null\",\"string\"],\"doc\":\"The
 original associated with this 
WebPage.\",\"default\":null},{\"name\":\"status\",\"type\":\"int\",\"doc\":\"A 
crawl status associated with the WebPage, can be of value STATUS_UNFETCHED - 
WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully fetched, 
STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP - WebPage temporarily 
redirects to other page, STATUS_REDIR_PERM - WebPage permanently redirects to 
other page, STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. 
transient errors and STATUS_NOTMODIFIED - fetching successful - page is not 
modified\",\"default\":0},{\"name\":\"fetchTime\",\"type\":\"long\",\"doc\":\"The
 system time in milliseconds for when 
 the page was 
fetched.\",\"default\":0},{\"name\":\"prevFetchTime\",\"type\":\"long\",\"doc\":\"The
 system time in milliseconds for when the page was last fetched if it was 
previously fetched which can be used to calculate time delta within a fetching 
schedule 
implementation\",\"default\":0},{\"name\":\"fetchInterval\",\"type\":\"int\",\"doc\":\"The
 default number of seconds between re-fetches of a page. The default is 
considered as 30 days unless a custom fetch schedle is 
implemented.\",\"default\":0},{\"name\":\"retriesSinceFetch\",\"type\":\"int\",\"doc\":\"The
 number of retried attempts at fetching the WebPage since it was last 
successfully 
fetched.\",\"default\":0},{\"name\":\"modifiedTime\",\"type\":\"long\",\"doc\":\"The
 system time in milliseconds for when this WebPage was modified by the WebPage 
author, if this is not available we default to the server for this information. 
This is important to understand the changing nature of the 
WebPage.\",\"default\":0},{\"name\":\"prevM
 odifiedTime\",\"type\":\"long\",\"doc\":\"The system time in milliseconds for 
when this WebPage was previously modified by the author, if this is not 
available then we default to the server for this information. This is important 
to understand the changing nature of a 
WebPage.\",\"default\":0},{\"name\":\"protocolStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ProtocolStatus\",\"doc\":\"A
 nested container representing data captured from web server 
responses.\",\"fields\":[{\"name\":\"code\",\"type\":\"int\",\"doc\":\"A 
protocol response code which can be one of SUCCESS - content was retrieved 
without errors, FAILED - Content was not retrieved. Any further errors may be 
indicated in args, PROTO_NOT_FOUND - This protocol was not found. Application 
may attempt to retry later, GONE - Resource is gone, MOVED - Resource has moved 
permanently. New url should be found in args, TEMP_MOVED - Resource has moved 
temporarily. New url should be found in args., NOTFOUND - Resource was 
 not found, RETRY - Temporary failure. Application may retry immediately., 
EXCEPTION - Unspecified exception occured. Further information may be provided 
in args., ACCESS_DENIED - Access denied - authorization required, but 
missing/incorrect., ROBOTS_DENIED - Access denied by robots.txt rules., 
REDIR_EXCEEDED - Too many redirects., NOTFETCHING - Not fetching., NOTMODIFIED 
- Unchanged since the last fetch., WOULDBLOCK - Request was refused by protocol 
plugins, because it would block. The expected number of milliseconds to wait 
before retry may be provided in args., BLOCKED - Thread was blocked 
http.max.delays times during 
fetching.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional
 arguments supplied to compliment and/or justify the response 
code.\",\"default\":[]},{\"name\":\"lastModified\",\"type\":\"long\",\"doc\":\"A
 server reponse indicating when this page was last modified, this can be 
unreliable at times hence this is used 
 as a default fall back value for the preferred 'modifiedTime' and 
'preModifiedTime' obtained from the WebPage 
itself.\",\"default\":0}]}],\"default\":null},{\"name\":\"content\",\"type\":[\"null\",\"bytes\"],\"doc\":\"The
 entire raw document content e.g. raw 
XHTML\",\"default\":null},{\"name\":\"contentType\",\"type\":[\"null\",\"string\"],\"doc\":\"The
 type of the content contained within the document itself. ContentType is an 
alias for MimeType. Historically, this parameter was only called MimeType, but 
since this is actually the value included in the HTTP Content-Type header, it 
can also include the character set encoding, which makes it more than just a 
MimeType specification. If MimeType is specified e.g. not None, that value is 
used. Otherwise, ContentType is used. If neither is given, the 
DEFAULT_CONTENT_TYPE setting is 
used.\",\"default\":null},{\"name\":\"prevSignature\",\"type\":[\"null\",\"bytes\"],\"doc\":\"An
 implementation of a WebPage's previous signature from which i
 t can be identified and referenced at any point in time. This can be used to 
uniquely identify WebPage deltas based on page 
fingerprints.\",\"default\":null},{\"name\":\"signature\",\"type\":[\"null\",\"bytes\"],\"doc\":\"An
 implementation of a WebPage's signature from which it can be identified and 
referenced at any point in time. This is essentially the WebPage's fingerprint 
represnting its state for any point in 
time.\",\"default\":null},{\"name\":\"title\",\"type\":[\"null\",\"string\"],\"doc\":\"The
 title of the 
WebPage.\",\"default\":null},{\"name\":\"text\",\"type\":[\"null\",\"string\"],\"doc\":\"The
 textual content of the WebPage devoid from native 
markup.\",\"default\":null},{\"name\":\"parseStatus\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"ParseStatus\",\"doc\":\"A
 nested container representing parse status data captured from invocation of 
parsers on fetch of a 
WebPage\",\"fields\":[{\"name\":\"majorCode\",\"type\":\"int\",\"doc\":\"Major 
parsing status' includi
 ng NOTPARSED (Parsing was not performed), SUCCESS (Parsing succeeded), FAILED 
(General failure. There may be a more specific error message in 
arguments.)\",\"default\":0},{\"name\":\"minorCode\",\"type\":\"int\",\"doc\":\"Minor
 parsing status' including SUCCESS_OK - Successful parse devoid of anomalies or 
issues, SUCCESS_REDIRECT - Parsed content contains a directive to redirect to 
another URL. The target URL can be retrieved from the arguments., 
FAILED_EXCEPTION - Parsing failed. An Exception occured which may be retrieved 
from the arguments., FAILED_TRUNCATED - Parsing failed. Content was truncated, 
but the parser cannot handle incomplete content., FAILED_INVALID_FORMAT - 
Parsing failed. Invalid format e.g. the content may be corrupted or of wrong 
type., FAILED_MISSING_PARTS - Parsing failed. Other related parts of the 
content are needed to complete parsing. The list of URLs to missing parts may 
be provided in arguments. The Fetcher may decide to fetch these parts at once, 
then pu
 t them into Content.metadata, and supply them for re-parsing., 
FAILED_MISING_CONTENT - Parsing failed. There was no content to be parsed - 
probably caused by errors at protocol 
stage.\",\"default\":0},{\"name\":\"args\",\"type\":{\"type\":\"array\",\"items\":\"string\"},\"doc\":\"Optional
 arguments supplied to compliment and/or justify the parse status 
code.\",\"default\":[]}]}],\"default\":null},{\"name\":\"score\",\"type\":\"float\",\"doc\":\"A
 score used to determine a WebPage's relevance within the web graph it is part 
of. This score may change over time based on graph 
characteristics.\",\"default\":0},{\"name\":\"reprUrl\",\"type\":[\"null\",\"string\"],\"doc\":\"In
 the case where we are given two urls, a source and a destination of a 
redirect, we should determine and persist the representative url. The logic 
used to determine this is based largely on Yahoo!'s Slurp 
Crawler\",\"default\":null},{\"name\":\"headers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"
 doc\":\"Header information returned from the web server used to server the 
content which is subsequently fetched from. This includes keys such as 
TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, 
CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED 
and 
LOCATION.\",\"default\":{}},{\"name\":\"outlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Embedded
 hyperlinks which direct outside of the current 
domain.\",\"default\":{}},{\"name\":\"inlinks\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Embedded
 hyperlinks which link to pages within the current 
domain.\",\"default\":{}},{\"name\":\"markers\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"string\"]},\"doc\":\"Markers
 flags which represent user and machine decisions which have affected 
influenced a WebPage's current state. Markers can be system specific and user 
machine driven in nature. They are assigned to a WebPage on a job-
 by-job basis and thier values indicative of what actions should be associated 
with a 
WebPage.\",\"default\":{}},{\"name\":\"metadata\",\"type\":{\"type\":\"map\",\"values\":[\"null\",\"bytes\"]},\"doc\":\"A
 multi-valued metadata container used for storing everything from structured 
WebPage characterists, to ad-hoc extraction and metadata augmentation for any 
given 
WebPage.\",\"default\":{}},{\"name\":\"batchId\",\"type\":[\"null\",\"string\"],\"doc\":\"A
 batchId that this WebPage is assigned to. WebPage's are fetched in batches, 
called fetchlists. Pages are partitioned but can always be associated and 
fetched alongside pages of similar value (within a crawl cycle) based on 
batchId.\",\"default\":null}]}");
 
   /** Enum containing all data bean's fields. */
   public static enum Field {
-    BASE_URL(0, "baseUrl"),
-    STATUS(1, "status"),
-    FETCH_TIME(2, "fetchTime"),
-    PREV_FETCH_TIME(3, "prevFetchTime"),
-    FETCH_INTERVAL(4, "fetchInterval"),
-    RETRIES_SINCE_FETCH(5, "retriesSinceFetch"),
-    MODIFIED_TIME(6, "modifiedTime"),
-    PREV_MODIFIED_TIME(7, "prevModifiedTime"),
-    PROTOCOL_STATUS(8, "protocolStatus"),
-    CONTENT(9, "content"),
-    CONTENT_TYPE(10, "contentType"),
-    PREV_SIGNATURE(11, "prevSignature"),
-    SIGNATURE(12, "signature"),
-    TITLE(13, "title"),
-    TEXT(14, "text"),
-    PARSE_STATUS(15, "parseStatus"),
-    SCORE(16, "score"),
-    REPR_URL(17, "reprUrl"),
-    HEADERS(18, "headers"),
-    OUTLINKS(19, "outlinks"),
-    INLINKS(20, "inlinks"),
-    MARKERS(21, "markers"),
-    METADATA(22, "metadata"),
-    BATCH_ID(23, "batchId"),
-    ;
+    BASE_URL(0, "baseUrl"), STATUS(1, "status"), FETCH_TIME(2, "fetchTime"), 
PREV_FETCH_TIME(
+        3, "prevFetchTime"), FETCH_INTERVAL(4, "fetchInterval"), 
RETRIES_SINCE_FETCH(
+        5, "retriesSinceFetch"), MODIFIED_TIME(6, "modifiedTime"), 
PREV_MODIFIED_TIME(
+        7, "prevModifiedTime"), PROTOCOL_STATUS(8, "protocolStatus"), CONTENT(
+        9, "content"), CONTENT_TYPE(10, "contentType"), PREV_SIGNATURE(11,
+        "prevSignature"), SIGNATURE(12, "signature"), TITLE(13, "title"), TEXT(
+        14, "text"), PARSE_STATUS(15, "parseStatus"), SCORE(16, "score"), 
REPR_URL(
+        17, "reprUrl"), HEADERS(18, "headers"), OUTLINKS(19, "outlinks"), 
INLINKS(
+        20, "inlinks"), MARKERS(21, "markers"), METADATA(22, "metadata"), 
BATCH_ID(
+        23, "batchId"), ;
     /**
      * Field's index.
      */
@@ -64,59 +53,55 @@ public class WebPage extends org.apache.
 
     /**
      * Field's constructor
-     * @param index field's index.
-     * @param name field's name.
+     * 
+     * @param index
+     *          field's index.
+     * @param name
+     *          field's name.
      */
-    Field(int index, String name) {this.index=index;this.name=name;}
+    Field(int index, String name) {
+      this.index = index;
+      this.name = name;
+    }
 
     /**
      * Gets field's index.
+     * 
      * @return int field's index.
      */
-    public int getIndex() {return index;}
+    public int getIndex() {
+      return index;
+    }
 
     /**
      * Gets field's name.
+     * 
      * @return String field's name.
      */
-    public String getName() {return name;}
+    public String getName() {
+      return name;
+    }
 
     /**
      * Gets field's attributes to string.
+     * 
      * @return String field's attributes to string.
      */
-    public String toString() {return name;}
+    public String toString() {
+      return name;
+    }
   };
 
-  public static final String[] _ALL_FIELDS = {
-  "baseUrl",
-  "status",
-  "fetchTime",
-  "prevFetchTime",
-  "fetchInterval",
-  "retriesSinceFetch",
-  "modifiedTime",
-  "prevModifiedTime",
-  "protocolStatus",
-  "content",
-  "contentType",
-  "prevSignature",
-  "signature",
-  "title",
-  "text",
-  "parseStatus",
-  "score",
-  "reprUrl",
-  "headers",
-  "outlinks",
-  "inlinks",
-  "markers",
-  "metadata",
-  "batchId",
-  };
+  public static final String[] _ALL_FIELDS = { "baseUrl", "status",
+      "fetchTime", "prevFetchTime", "fetchInterval", "retriesSinceFetch",
+      "modifiedTime", "prevModifiedTime", "protocolStatus", "content",
+      "contentType", "prevSignature", "signature", "title", "text",
+      "parseStatus", "score", "reprUrl", "headers", "outlinks", "inlinks",
+      "markers", "metadata", "batchId", };
 
   /**
    * Gets the total field count.
+   * 
    * @return int field count
    */
   public int getFieldsCount() {
@@ -125,301 +110,524 @@ public class WebPage extends org.apache.
 
   /** The original associated with this WebPage. */
   private java.lang.CharSequence baseUrl;
-  /** A crawl status associated with the WebPage, can be of value 
STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was 
successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP 
- WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage 
permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, 
needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching 
successful - page is not modified */
+  /**
+   * A crawl status associated with the WebPage, can be of value
+   * STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage
+   * was successfully fetched, STATUS_GONE - WebPage no longer exists,
+   * STATUS_REDIR_TEMP - WebPage temporarily redirects to other page,
+   * STATUS_REDIR_PERM - WebPage permanently redirects to other page,
+   * STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient
+   * errors and STATUS_NOTMODIFIED - fetching successful - page is not modified
+   */
   private int status;
   /** The system time in milliseconds for when the page was fetched. */
   private long fetchTime;
-  /** The system time in milliseconds for when the page was last fetched if it 
was previously fetched which can be used to calculate time delta within a 
fetching schedule implementation */
+  /**
+   * The system time in milliseconds for when the page was last fetched if it
+   * was previously fetched which can be used to calculate time delta within a
+   * fetching schedule implementation
+   */
   private long prevFetchTime;
-  /** The default number of seconds between re-fetches of a page. The default 
is considered as 30 days unless a custom fetch schedle is implemented. */
+  /**
+   * The default number of seconds between re-fetches of a page. The default is
+   * considered as 30 days unless a custom fetch schedle is implemented.
+   */
   private int fetchInterval;
-  /** The number of retried attempts at fetching the WebPage since it was last 
successfully fetched. */
+  /**
+   * The number of retried attempts at fetching the WebPage since it was last
+   * successfully fetched.
+   */
   private int retriesSinceFetch;
-  /** The system time in milliseconds for when this WebPage was modified by 
the WebPage author, if this is not available we default to the server for this 
information. This is important to understand the changing nature of the 
WebPage. */
+  /**
+   * The system time in milliseconds for when this WebPage was modified by the
+   * WebPage author, if this is not available we default to the server for this
+   * information. This is important to understand the changing nature of the
+   * WebPage.
+   */
   private long modifiedTime;
-  /** The system time in milliseconds for when this WebPage was previously 
modified by the author, if this is not available then we default to the server 
for this information. This is important to understand the changing nature of a 
WebPage. */
+  /**
+   * The system time in milliseconds for when this WebPage was previously
+   * modified by the author, if this is not available then we default to the
+   * server for this information. This is important to understand the changing
+   * nature of a WebPage.
+   */
   private long prevModifiedTime;
   private org.apache.nutch.storage.ProtocolStatus protocolStatus;
   /** The entire raw document content e.g. raw XHTML */
   private java.nio.ByteBuffer content;
-  /** The type of the content contained within the document itself. 
ContentType is an alias for MimeType. Historically, this parameter was only 
called MimeType, but since this is actually the value included in the HTTP 
Content-Type header, it can also include the character set encoding, which 
makes it more than just a MimeType specification. If MimeType is specified e.g. 
not None, that value is used. Otherwise, ContentType is used. If neither is 
given, the DEFAULT_CONTENT_TYPE setting is used. */
+  /**
+   * The type of the content contained within the document itself. ContentType
+   * is an alias for MimeType. Historically, this parameter was only called
+   * MimeType, but since this is actually the value included in the HTTP
+   * Content-Type header, it can also include the character set encoding, which
+   * makes it more than just a MimeType specification. If MimeType is specified
+   * e.g. not None, that value is used. Otherwise, ContentType is used. If
+   * neither is given, the DEFAULT_CONTENT_TYPE setting is used.
+   */
   private java.lang.CharSequence contentType;
-  /** An implementation of a WebPage's previous signature from which it can be 
identified and referenced at any point in time. This can be used to uniquely 
identify WebPage deltas based on page fingerprints. */
+  /**
+   * An implementation of a WebPage's previous signature from which it can be
+   * identified and referenced at any point in time. This can be used to
+   * uniquely identify WebPage deltas based on page fingerprints.
+   */
   private java.nio.ByteBuffer prevSignature;
-  /** An implementation of a WebPage's signature from which it can be 
identified and referenced at any point in time. This is essentially the 
WebPage's fingerprint represnting its state for any point in time. */
+  /**
+   * An implementation of a WebPage's signature from which it can be identified
+   * and referenced at any point in time. This is essentially the WebPage's
+   * fingerprint represnting its state for any point in time.
+   */
   private java.nio.ByteBuffer signature;
   /** The title of the WebPage. */
   private java.lang.CharSequence title;
   /** The textual content of the WebPage devoid from native markup. */
   private java.lang.CharSequence text;
   private org.apache.nutch.storage.ParseStatus parseStatus;
-  /** A score used to determine a WebPage's relevance within the web graph it 
is part of. This score may change over time based on graph characteristics. */
+  /**
+   * A score used to determine a WebPage's relevance within the web graph it is
+   * part of. This score may change over time based on graph characteristics.
+   */
   private float score;
-  /** In the case where we are given two urls, a source and a destination of a 
redirect, we should determine and persist the representative url. The logic 
used to determine this is based largely on Yahoo!'s Slurp Crawler */
+  /**
+   * In the case where we are given two urls, a source and a destination of a
+   * redirect, we should determine and persist the representative url. The 
logic
+   * used to determine this is based largely on Yahoo!'s Slurp Crawler
+   */
   private java.lang.CharSequence reprUrl;
-  /** Header information returned from the web server used to server the 
content which is subsequently fetched from. This includes keys such as 
TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, 
CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED 
and LOCATION. */
-  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> headers;
+  /**
+   * Header information returned from the web server used to server the content
+   * which is subsequently fetched from. This includes keys such as
+   * TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH,
+   * CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE,
+   * LAST_MODIFIED and LOCATION.
+   */
+  private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> 
headers;
   /** Embedded hyperlinks which direct outside of the current domain. */
-  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> 
outlinks;
+  private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> 
outlinks;
   /** Embedded hyperlinks which link to pages within the current domain. */
-  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> inlinks;
-  /** Markers flags which represent user and machine decisions which have 
affected influenced a WebPage's current state. Markers can be system specific 
and user machine driven in nature. They are assigned to a WebPage on a 
job-by-job basis and thier values indicative of what actions should be 
associated with a WebPage. */
-  private java.util.Map<java.lang.CharSequence,java.lang.CharSequence> markers;
-  /** A multi-valued metadata container used for storing everything from 
structured WebPage characterists, to ad-hoc extraction and metadata 
augmentation for any given WebPage. */
-  private java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer> metadata;
-  /** A batchId that this WebPage is assigned to. WebPage's are fetched in 
batches, called fetchlists. Pages are partitioned but can always be associated 
and fetched alongside pages of similar value (within a crawl cycle) based on 
batchId. */
+  private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> 
inlinks;
+  /**
+   * Markers flags which represent user and machine decisions which have
+   * affected influenced a WebPage's current state. Markers can be system
+   * specific and user machine driven in nature. They are assigned to a WebPage
+   * on a job-by-job basis and thier values indicative of what actions should 
be
+   * associated with a WebPage.
+   */
+  private java.util.Map<java.lang.CharSequence, java.lang.CharSequence> 
markers;
+  /**
+   * A multi-valued metadata container used for storing everything from
+   * structured WebPage characterists, to ad-hoc extraction and metadata
+   * augmentation for any given WebPage.
+   */
+  private java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer> metadata;
+  /**
+   * A batchId that this WebPage is assigned to. WebPage's are fetched in
+   * batches, called fetchlists. Pages are partitioned but can always be
+   * associated and fetched alongside pages of similar value (within a crawl
+   * cycle) based on batchId.
+   */
   private java.lang.CharSequence batchId;
-  public org.apache.avro.Schema getSchema() { return SCHEMA$; }
-  // Used by DatumWriter.  Applications should not call. 
+
+  public org.apache.avro.Schema getSchema() {
+    return SCHEMA$;
+  }
+
+  // Used by DatumWriter. Applications should not call.
   public java.lang.Object get(int field$) {
     switch (field$) {
-    case 0: return baseUrl;
-    case 1: return status;
-    case 2: return fetchTime;
-    case 3: return prevFetchTime;
-    case 4: return fetchInterval;
-    case 5: return retriesSinceFetch;
-    case 6: return modifiedTime;
-    case 7: return prevModifiedTime;
-    case 8: return protocolStatus;
-    case 9: return content;
-    case 10: return contentType;
-    case 11: return prevSignature;
-    case 12: return signature;
-    case 13: return title;
-    case 14: return text;
-    case 15: return parseStatus;
-    case 16: return score;
-    case 17: return reprUrl;
-    case 18: return headers;
-    case 19: return outlinks;
-    case 20: return inlinks;
-    case 21: return markers;
-    case 22: return metadata;
-    case 23: return batchId;
-    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
-    }
-  }
-  
-  // Used by DatumReader.  Applications should not call. 
-  @SuppressWarnings(value="unchecked")
+    case 0:
+      return baseUrl;
+    case 1:
+      return status;
+    case 2:
+      return fetchTime;
+    case 3:
+      return prevFetchTime;
+    case 4:
+      return fetchInterval;
+    case 5:
+      return retriesSinceFetch;
+    case 6:
+      return modifiedTime;
+    case 7:
+      return prevModifiedTime;
+    case 8:
+      return protocolStatus;
+    case 9:
+      return content;
+    case 10:
+      return contentType;
+    case 11:
+      return prevSignature;
+    case 12:
+      return signature;
+    case 13:
+      return title;
+    case 14:
+      return text;
+    case 15:
+      return parseStatus;
+    case 16:
+      return score;
+    case 17:
+      return reprUrl;
+    case 18:
+      return headers;
+    case 19:
+      return outlinks;
+    case 20:
+      return inlinks;
+    case 21:
+      return markers;
+    case 22:
+      return metadata;
+    case 23:
+      return batchId;
+    default:
+      throw new org.apache.avro.AvroRuntimeException("Bad index");
+    }
+  }
+
+  // Used by DatumReader. Applications should not call.
+  @SuppressWarnings(value = "unchecked")
   public void put(int field$, java.lang.Object value) {
     switch (field$) {
-    case 0: baseUrl = (java.lang.CharSequence)(value); break;
-    case 1: status = (java.lang.Integer)(value); break;
-    case 2: fetchTime = (java.lang.Long)(value); break;
-    case 3: prevFetchTime = (java.lang.Long)(value); break;
-    case 4: fetchInterval = (java.lang.Integer)(value); break;
-    case 5: retriesSinceFetch = (java.lang.Integer)(value); break;
-    case 6: modifiedTime = (java.lang.Long)(value); break;
-    case 7: prevModifiedTime = (java.lang.Long)(value); break;
-    case 8: protocolStatus = (org.apache.nutch.storage.ProtocolStatus)(value); 
break;
-    case 9: content = (java.nio.ByteBuffer)(value); break;
-    case 10: contentType = (java.lang.CharSequence)(value); break;
-    case 11: prevSignature = (java.nio.ByteBuffer)(value); break;
-    case 12: signature = (java.nio.ByteBuffer)(value); break;
-    case 13: title = (java.lang.CharSequence)(value); break;
-    case 14: text = (java.lang.CharSequence)(value); break;
-    case 15: parseStatus = (org.apache.nutch.storage.ParseStatus)(value); 
break;
-    case 16: score = (java.lang.Float)(value); break;
-    case 17: reprUrl = (java.lang.CharSequence)(value); break;
-    case 18: headers = 
(java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value 
instanceof org.apache.gora.persistency.Dirtyable) ? value : new 
org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 19: outlinks = 
(java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value 
instanceof org.apache.gora.persistency.Dirtyable) ? value : new 
org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 20: inlinks = 
(java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value 
instanceof org.apache.gora.persistency.Dirtyable) ? value : new 
org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 21: markers = 
(java.util.Map<java.lang.CharSequence,java.lang.CharSequence>)((value 
instanceof org.apache.gora.persistency.Dirtyable) ? value : new 
org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 22: metadata = 
(java.util.Map<java.lang.CharSequence,java.nio.ByteBuffer>)((value instanceof 
org.apache.gora.persistency.Dirtyable) ? value : new 
org.apache.gora.persistency.impl.DirtyMapWrapper((java.util.Map)value)); break;
-    case 23: batchId = (java.lang.CharSequence)(value); break;
-    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+    case 0:
+      baseUrl = (java.lang.CharSequence) (value);
+      break;
+    case 1:
+      status = (java.lang.Integer) (value);
+      break;
+    case 2:
+      fetchTime = (java.lang.Long) (value);
+      break;
+    case 3:
+      prevFetchTime = (java.lang.Long) (value);
+      break;
+    case 4:
+      fetchInterval = (java.lang.Integer) (value);
+      break;
+    case 5:
+      retriesSinceFetch = (java.lang.Integer) (value);
+      break;
+    case 6:
+      modifiedTime = (java.lang.Long) (value);
+      break;
+    case 7:
+      prevModifiedTime = (java.lang.Long) (value);
+      break;
+    case 8:
+      protocolStatus = (org.apache.nutch.storage.ProtocolStatus) (value);
+      break;
+    case 9:
+      content = (java.nio.ByteBuffer) (value);
+      break;
+    case 10:
+      contentType = (java.lang.CharSequence) (value);
+      break;
+    case 11:
+      prevSignature = (java.nio.ByteBuffer) (value);
+      break;
+    case 12:
+      signature = (java.nio.ByteBuffer) (value);
+      break;
+    case 13:
+      title = (java.lang.CharSequence) (value);
+      break;
+    case 14:
+      text = (java.lang.CharSequence) (value);
+      break;
+    case 15:
+      parseStatus = (org.apache.nutch.storage.ParseStatus) (value);
+      break;
+    case 16:
+      score = (java.lang.Float) (value);
+      break;
+    case 17:
+      reprUrl = (java.lang.CharSequence) (value);
+      break;
+    case 18:
+      headers = (java.util.Map<java.lang.CharSequence, 
java.lang.CharSequence>) ((value instanceof 
org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    case 19:
+      outlinks = (java.util.Map<java.lang.CharSequence, 
java.lang.CharSequence>) ((value instanceof 
org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    case 20:
+      inlinks = (java.util.Map<java.lang.CharSequence, 
java.lang.CharSequence>) ((value instanceof 
org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    case 21:
+      markers = (java.util.Map<java.lang.CharSequence, 
java.lang.CharSequence>) ((value instanceof 
org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    case 22:
+      metadata = (java.util.Map<java.lang.CharSequence, java.nio.ByteBuffer>) 
((value instanceof org.apache.gora.persistency.Dirtyable) ? value
+          : new org.apache.gora.persistency.impl.DirtyMapWrapper(
+              (java.util.Map) value));
+      break;
+    case 23:
+      batchId = (java.lang.CharSequence) (value);
+      break;
+    default:
+      throw new org.apache.avro.AvroRuntimeException("Bad index");
     }
   }
 
   /**
-   * Gets the value of the 'baseUrl' field.
-   * The original associated with this WebPage.   */
+   * Gets the value of the 'baseUrl' field. The original associated with this
+   * WebPage.
+   */
   public java.lang.CharSequence getBaseUrl() {
     return baseUrl;
   }
 
   /**
-   * Sets the value of the 'baseUrl' field.
-   * The original associated with this WebPage.   * @param value the value to 
set.
+   * Sets the value of the 'baseUrl' field. The original associated with this
+   * WebPage. * @param value the value to set.
    */
   public void setBaseUrl(java.lang.CharSequence value) {
     this.baseUrl = value;
     setDirty(0);
   }
-  
+
   /**
-   * Checks the dirty status of the 'baseUrl' field. A field is dirty if it 
represents a change that has not yet been written to the database.
-   * The original associated with this WebPage.   * @param value the value to 
set.
+   * Checks the dirty status of the 'baseUrl' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The
+   * original associated with this WebPage. * @param value the value to set.
    */
   public boolean isBaseUrlDirty(java.lang.CharSequence value) {
     return isDirty(0);
   }
 
   /**
-   * Gets the value of the 'status' field.
-   * A crawl status associated with the WebPage, can be of value 
STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was 
successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP 
- WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage 
permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, 
needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching 
successful - page is not modified   */
+   * Gets the value of the 'status' field. A crawl status associated with the
+   * WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet,
+   * STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage 
no
+   * longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other
+   * page, STATUS_REDIR_PERM - WebPage permanently redirects to other page,
+   * STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient
+   * errors and STATUS_NOTMODIFIED - fetching successful - page is not modified
+   */
   public java.lang.Integer getStatus() {
     return status;
   }
 
   /**
-   * Sets the value of the 'status' field.
-   * A crawl status associated with the WebPage, can be of value 
STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was 
successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP 
- WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage 
permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, 
needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching 
successful - page is not modified   * @param value the value to set.
+   * Sets the value of the 'status' field. A crawl status associated with the
+   * WebPage, can be of value STATUS_UNFETCHED - WebPage was not fetched yet,
+   * STATUS_FETCHED - WebPage was successfully fetched, STATUS_GONE - WebPage 
no
+   * longer exists, STATUS_REDIR_TEMP - WebPage temporarily redirects to other
+   * page, STATUS_REDIR_PERM - WebPage permanently redirects to other page,
+   * STATUS_RETRY - Fetching unsuccessful, needs to be retried e.g. transient
+   * errors and STATUS_NOTMODIFIED - fetching successful - page is not modified
+   * * @param value the value to set.
    */
   public void setStatus(java.lang.Integer value) {
     this.status = value;
     setDirty(1);
   }
-  
+
   /**
-   * Checks the dirty status of the 'status' field. A field is dirty if it 
represents a change that has not yet been written to the database.
-   * A crawl status associated with the WebPage, can be of value 
STATUS_UNFETCHED - WebPage was not fetched yet, STATUS_FETCHED - WebPage was 
successfully fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP 
- WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage 
permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful, 
needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - fetching 
successful - page is not modified   * @param value the value to set.
+   * Checks the dirty status of the 'status' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. A crawl
+   * status associated with the WebPage, can be of value STATUS_UNFETCHED -
+   * WebPage was not fetched yet, STATUS_FETCHED - WebPage was successfully
+   * fetched, STATUS_GONE - WebPage no longer exists, STATUS_REDIR_TEMP -
+   * WebPage temporarily redirects to other page, STATUS_REDIR_PERM - WebPage
+   * permanently redirects to other page, STATUS_RETRY - Fetching unsuccessful,
+   * needs to be retried e.g. transient errors and STATUS_NOTMODIFIED - 
fetching
+   * successful - page is not modified * @param value the value to set.
    */
   public boolean isStatusDirty(java.lang.Integer value) {
     return isDirty(1);
   }
 
   /**
-   * Gets the value of the 'fetchTime' field.
-   * The system time in milliseconds for when the page was fetched.   */
+   * Gets the value of the 'fetchTime' field. The system time in milliseconds
+   * for when the page was fetched.
+   */
   public java.lang.Long getFetchTime() {
     return fetchTime;
   }
 
   /**
-   * Sets the value of the 'fetchTime' field.
-   * The system time in milliseconds for when the page was fetched.   * @param 
value the value to set.
+   * Sets the value of the 'fetchTime' field. The system time in milliseconds
+   * for when the page was fetched. * @param value the value to set.
    */
   public void setFetchTime(java.lang.Long value) {
     this.fetchTime = value;
     setDirty(2);
   }
-  
+
   /**
-   * Checks the dirty status of the 'fetchTime' field. A field is dirty if it 
represents a change that has not yet been written to the database.
-   * The system time in milliseconds for when the page was fetched.   * @param 
value the value to set.
+   * Checks the dirty status of the 'fetchTime' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The
+   * system time in milliseconds for when the page was fetched. * @param value
+   * the value to set.
    */
   public boolean isFetchTimeDirty(java.lang.Long value) {
     return isDirty(2);
   }
 
   /**
-   * Gets the value of the 'prevFetchTime' field.
-   * The system time in milliseconds for when the page was last fetched if it 
was previously fetched which can be used to calculate time delta within a 
fetching schedule implementation   */
+   * Gets the value of the 'prevFetchTime' field. The system time in
+   * milliseconds for when the page was last fetched if it was previously
+   * fetched which can be used to calculate time delta within a fetching
+   * schedule implementation
+   */
   public java.lang.Long getPrevFetchTime() {
     return prevFetchTime;
   }
 
   /**
-   * Sets the value of the 'prevFetchTime' field.
-   * The system time in milliseconds for when the page was last fetched if it 
was previously fetched which can be used to calculate time delta within a 
fetching schedule implementation   * @param value the value to set.
+   * Sets the value of the 'prevFetchTime' field. The system time in
+   * milliseconds for when the page was last fetched if it was previously
+   * fetched which can be used to calculate time delta within a fetching
+   * schedule implementation * @param value the value to set.
    */
   public void setPrevFetchTime(java.lang.Long value) {
     this.prevFetchTime = value;
     setDirty(3);
   }
-  
+
   /**
-   * Checks the dirty status of the 'prevFetchTime' field. A field is dirty if 
it represents a change that has not yet been written to the database.
-   * The system time in milliseconds for when the page was last fetched if it 
was previously fetched which can be used to calculate time delta within a 
fetching schedule implementation   * @param value the value to set.
+   * Checks the dirty status of the 'prevFetchTime' field. A field is dirty if
+   * it represents a change that has not yet been written to the database. The
+   * system time in milliseconds for when the page was last fetched if it was
+   * previously fetched which can be used to calculate time delta within a
+   * fetching schedule implementation * @param value the value to set.
    */
   public boolean isPrevFetchTimeDirty(java.lang.Long value) {
     return isDirty(3);
   }
 
   /**
-   * Gets the value of the 'fetchInterval' field.
-   * The default number of seconds between re-fetches of a page. The default 
is considered as 30 days unless a custom fetch schedle is implemented.   */
+   * Gets the value of the 'fetchInterval' field. The default number of seconds
+   * between re-fetches of a page. The default is considered as 30 days unless 
a
+   * custom fetch schedle is implemented.
+   */
   public java.lang.Integer getFetchInterval() {
     return fetchInterval;
   }
 
   /**
-   * Sets the value of the 'fetchInterval' field.
-   * The default number of seconds between re-fetches of a page. The default 
is considered as 30 days unless a custom fetch schedle is implemented.   * 
@param value the value to set.
+   * Sets the value of the 'fetchInterval' field. The default number of seconds
+   * between re-fetches of a page. The default is considered as 30 days unless 
a
+   * custom fetch schedle is implemented. * @param value the value to set.
    */
   public void setFetchInterval(java.lang.Integer value) {
     this.fetchInterval = value;
     setDirty(4);
   }
-  
+
   /**
-   * Checks the dirty status of the 'fetchInterval' field. A field is dirty if 
it represents a change that has not yet been written to the database.
-   * The default number of seconds between re-fetches of a page. The default 
is considered as 30 days unless a custom fetch schedle is implemented.   * 
@param value the value to set.
+   * Checks the dirty status of the 'fetchInterval' field. A field is dirty if
+   * it represents a change that has not yet been written to the database. The
+   * default number of seconds between re-fetches of a page. The default is
+   * considered as 30 days unless a custom fetch schedle is implemented. * 
@param
+   * value the value to set.
    */
   public boolean isFetchIntervalDirty(java.lang.Integer value) {
     return isDirty(4);
   }
 
   /**
-   * Gets the value of the 'retriesSinceFetch' field.
-   * The number of retried attempts at fetching the WebPage since it was last 
successfully fetched.   */
+   * Gets the value of the 'retriesSinceFetch' field. The number of retried
+   * attempts at fetching the WebPage since it was last successfully fetched.
+   */
   public java.lang.Integer getRetriesSinceFetch() {
     return retriesSinceFetch;
   }
 
   /**
-   * Sets the value of the 'retriesSinceFetch' field.
-   * The number of retried attempts at fetching the WebPage since it was last 
successfully fetched.   * @param value the value to set.
+   * Sets the value of the 'retriesSinceFetch' field. The number of retried
+   * attempts at fetching the WebPage since it was last successfully fetched. 
* @param
+   * value the value to set.
    */
   public void setRetriesSinceFetch(java.lang.Integer value) {
     this.retriesSinceFetch = value;
     setDirty(5);
   }
-  
+
   /**
-   * Checks the dirty status of the 'retriesSinceFetch' field. A field is 
dirty if it represents a change that has not yet been written to the database.
-   * The number of retried attempts at fetching the WebPage since it was last 
successfully fetched.   * @param value the value to set.
+   * Checks the dirty status of the 'retriesSinceFetch' field. A field is dirty
+   * if it represents a change that has not yet been written to the database.
+   * The number of retried attempts at fetching the WebPage since it was last
+   * successfully fetched. * @param value the value to set.
    */
   public boolean isRetriesSinceFetchDirty(java.lang.Integer value) {
     return isDirty(5);
   }
 
   /**
-   * Gets the value of the 'modifiedTime' field.
-   * The system time in milliseconds for when this WebPage was modified by the 
WebPage author, if this is not available we default to the server for this 
information. This is important to understand the changing nature of the 
WebPage.   */
+   * Gets the value of the 'modifiedTime' field. The system time in 
milliseconds
+   * for when this WebPage was modified by the WebPage author, if this is not
+   * available we default to the server for this information. This is important
+   * to understand the changing nature of the WebPage.
+   */
   public java.lang.Long getModifiedTime() {
     return modifiedTime;
   }
 
   /**
-   * Sets the value of the 'modifiedTime' field.
-   * The system time in milliseconds for when this WebPage was modified by the 
WebPage author, if this is not available we default to the server for this 
information. This is important to understand the changing nature of the 
WebPage.   * @param value the value to set.
+   * Sets the value of the 'modifiedTime' field. The system time in 
milliseconds
+   * for when this WebPage was modified by the WebPage author, if this is not
+   * available we default to the server for this information. This is important
+   * to understand the changing nature of the WebPage. * @param value the value
+   * to set.
    */
   public void setModifiedTime(java.lang.Long value) {
     this.modifiedTime = value;
     setDirty(6);
   }
-  
+
   /**
-   * Checks the dirty status of the 'modifiedTime' field. A field is dirty if 
it represents a change that has not yet been written to the database.
-   * The system time in milliseconds for when this WebPage was modified by the 
WebPage author, if this is not available we default to the server for this 
information. This is important to understand the changing nature of the 
WebPage.   * @param value the value to set.
+   * Checks the dirty status of the 'modifiedTime' field. A field is dirty if 
it
+   * represents a change that has not yet been written to the database. The
+   * system time in milliseconds for when this WebPage was modified by the
+   * WebPage author, if this is not available we default to the server for this
+   * information. This is important to understand the changing nature of the
+   * WebPage. * @param value the value to set.
    */
   public boolean isModifiedTimeDirty(java.lang.Long value) {
     return isDirty(6);
   }
 
   /**
-   * Gets the value of the 'prevModifiedTime' field.
-   * The system time in milliseconds for when this WebPage was previously 
modified by the author, if this is not available then we default to the server 
for this information. This is important to understand the changing nature of a 
WebPage.   */
+   * Gets the value of the 'prevModifiedTime' field. The system time in
+   * milliseconds for when this WebPage was previously modified by the author,
+   * if this is not available then we default to the server for this
+   * information. This is important to understand the changing nature of a
+   * WebPage.
+   */
   public java.lang.Long getPrevModifiedTime() {
     return prevModifiedTime;
   }
 
   /**
-   * Sets the value of the 'prevModifiedTime' field.
-   * The system time in milliseconds for when this WebPage was previously 
modified by the author, if this is not available then we default to the server 
for this information. This is important to understand the changing nature of a 
WebPage.   * @param value the value to set.
+   * Sets the value of the 'prevModifiedTime' field. The system time in
+   * milliseconds for when this WebPage was previously modified by the author,
+   * if this is not available then we default to the server for this
+   * information. This is important to understand the changing nature of a
+   * WebPage. * @param value the value to set.
    */
   public void setPrevModifiedTime(java.lang.Long value) {
     this.prevModifiedTime = value;
     setDirty(7);
   }
-  
+
   /**
-   * Checks the dirty status of the 'prevModifiedTime' field. A field is dirty 
if it represents a change that has not yet been written to the database.
-   * The system time in milliseconds for when this WebPage was previously 
modified by the author, if this is not available then we default to the server 
for this information. This is important to understand the changing nature of a 
WebPage.   * @param value the value to set.
+   * Checks the dirty status of the 'prevModifiedTime' field. A field is dirty
+   * if it represents a change that has not yet been written to the database.
+   * The system time in milliseconds for when this WebPage was previously
+   * modified by the author, if this is not available then we default to the
+   * server for this information. This is important to understand the changing
+   * nature of a WebPage. * @param value the value to set.
    */
   public boolean isPrevModifiedTimeDirty(java.lang.Long value) {
     return isDirty(7);
@@ -434,160 +642,210 @@ public class WebPage extends org.apache.
 
   /**
    * Sets the value of the 'protocolStatus' field.
-   * @param value the value to set.
+   * 
+   * @param value
+   *          the value to set.
    */
   public void setProtocolStatus(org.apache.nutch.storage.ProtocolStatus value) 
{
     this.protocolStatus = value;
     setDirty(8);
   }
-  
+
   /**
-   * Checks the dirty status of the 'protocolStatus' field. A field is dirty 
if it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * Checks the dirty status of the 'protocolStatus' field. A field is dirty if
+   * it represents a change that has not yet been written to the database.
+   * 
+   * @param value
+   *          the value to set.
    */
-  public boolean isProtocolStatusDirty(org.apache.nutch.storage.ProtocolStatus 
value) {
+  public boolean isProtocolStatusDirty(
+      org.apache.nutch.storage.ProtocolStatus value) {
     return isDirty(8);
   }
 
   /**
-   * Gets the value of the 'content' field.
-   * The entire raw document content e.g. raw XHTML   */
+   * Gets the value of the 'content' field. The entire raw document content 
e.g.
+   * raw XHTML
+   */
   public java.nio.ByteBuffer getContent() {
     return content;
   }
 
   /**
-   * Sets the value of the 'content' field.
-   * The entire raw document content e.g. raw XHTML   * @param value the value 
to set.
+   * Sets the value of the 'content' field. The entire raw document content 
e.g.
+   * raw XHTML * @param value the value to set.
    */
   public void setContent(java.nio.ByteBuffer value) {
     this.content = value;
     setDirty(9);
   }
-  
+
   /**
-   * Checks the dirty status of the 'content' field. A field is dirty if it 
represents a change that has not yet been written to the database.
-   * The entire raw document content e.g. raw XHTML   * @param value the value 
to set.
+   * Checks the dirty status of the 'content' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The
+   * entire raw document content e.g. raw XHTML * @param value the value to 
set.
    */
   public boolean isContentDirty(java.nio.ByteBuffer value) {
     return isDirty(9);
   }
 
   /**
-   * Gets the value of the 'contentType' field.
-   * The type of the content contained within the document itself. ContentType 
is an alias for MimeType. Historically, this parameter was only called 
MimeType, but since this is actually the value included in the HTTP 
Content-Type header, it can also include the character set encoding, which 
makes it more than just a MimeType specification. If MimeType is specified e.g. 
not None, that value is used. Otherwise, ContentType is used. If neither is 
given, the DEFAULT_CONTENT_TYPE setting is used.   */
+   * Gets the value of the 'contentType' field. The type of the content
+   * contained within the document itself. ContentType is an alias for 
MimeType.
+   * Historically, this parameter was only called MimeType, but since this is
+   * actually the value included in the HTTP Content-Type header, it can also
+   * include the character set encoding, which makes it more than just a
+   * MimeType specification. If MimeType is specified e.g. not None, that value
+   * is used. Otherwise, ContentType is used. If neither is given, the
+   * DEFAULT_CONTENT_TYPE setting is used.
+   */
   public java.lang.CharSequence getContentType() {
     return contentType;
   }
 
   /**
-   * Sets the value of the 'contentType' field.
-   * The type of the content contained within the document itself. ContentType 
is an alias for MimeType. Historically, this parameter was only called 
MimeType, but since this is actually the value included in the HTTP 
Content-Type header, it can also include the character set encoding, which 
makes it more than just a MimeType specification. If MimeType is specified e.g. 
not None, that value is used. Otherwise, ContentType is used. If neither is 
given, the DEFAULT_CONTENT_TYPE setting is used.   * @param value the value to 
set.
+   * Sets the value of the 'contentType' field. The type of the content
+   * contained within the document itself. ContentType is an alias for 
MimeType.
+   * Historically, this parameter was only called MimeType, but since this is
+   * actually the value included in the HTTP Content-Type header, it can also
+   * include the character set encoding, which makes it more than just a
+   * MimeType specification. If MimeType is specified e.g. not None, that value
+   * is used. Otherwise, ContentType is used. If neither is given, the
+   * DEFAULT_CONTENT_TYPE setting is used. * @param value the value to set.
    */
   public void setContentType(java.lang.CharSequence value) {
     this.contentType = value;
     setDirty(10);
   }
-  
+
   /**
-   * Checks the dirty status of the 'contentType' field. A field is dirty if 
it represents a change that has not yet been written to the database.
-   * The type of the content contained within the document itself. ContentType 
is an alias for MimeType. Historically, this parameter was only called 
MimeType, but since this is actually the value included in the HTTP 
Content-Type header, it can also include the character set encoding, which 
makes it more than just a MimeType specification. If MimeType is specified e.g. 
not None, that value is used. Otherwise, ContentType is used. If neither is 
given, the DEFAULT_CONTENT_TYPE setting is used.   * @param value the value to 
set.
+   * Checks the dirty status of the 'contentType' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The 
type
+   * of the content contained within the document itself. ContentType is an
+   * alias for MimeType. Historically, this parameter was only called MimeType,
+   * but since this is actually the value included in the HTTP Content-Type
+   * header, it can also include the character set encoding, which makes it 
more
+   * than just a MimeType specification. If MimeType is specified e.g. not 
None,
+   * that value is used. Otherwise, ContentType is used. If neither is given,
+   * the DEFAULT_CONTENT_TYPE setting is used. * @param value the value to set.
    */
   public boolean isContentTypeDirty(java.lang.CharSequence value) {
     return isDirty(10);
   }
 
   /**
-   * Gets the value of the 'prevSignature' field.
-   * An implementation of a WebPage's previous signature from which it can be 
identified and referenced at any point in time. This can be used to uniquely 
identify WebPage deltas based on page fingerprints.   */
+   * Gets the value of the 'prevSignature' field. An implementation of a
+   * WebPage's previous signature from which it can be identified and 
referenced
+   * at any point in time. This can be used to uniquely identify WebPage deltas
+   * based on page fingerprints.
+   */
   public java.nio.ByteBuffer getPrevSignature() {
     return prevSignature;
   }
 
   /**
-   * Sets the value of the 'prevSignature' field.
-   * An implementation of a WebPage's previous signature from which it can be 
identified and referenced at any point in time. This can be used to uniquely 
identify WebPage deltas based on page fingerprints.   * @param value the value 
to set.
+   * Sets the value of the 'prevSignature' field. An implementation of a
+   * WebPage's previous signature from which it can be identified and 
referenced
+   * at any point in time. This can be used to uniquely identify WebPage deltas
+   * based on page fingerprints. * @param value the value to set.
    */
   public void setPrevSignature(java.nio.ByteBuffer value) {
     this.prevSignature = value;
     setDirty(11);
   }
-  
+
   /**
-   * Checks the dirty status of the 'prevSignature' field. A field is dirty if 
it represents a change that has not yet been written to the database.
-   * An implementation of a WebPage's previous signature from which it can be 
identified and referenced at any point in time. This can be used to uniquely 
identify WebPage deltas based on page fingerprints.   * @param value the value 
to set.
+   * Checks the dirty status of the 'prevSignature' field. A field is dirty if
+   * it represents a change that has not yet been written to the database. An
+   * implementation of a WebPage's previous signature from which it can be
+   * identified and referenced at any point in time. This can be used to
+   * uniquely identify WebPage deltas based on page fingerprints. * @param 
value
+   * the value to set.
    */
   public boolean isPrevSignatureDirty(java.nio.ByteBuffer value) {
     return isDirty(11);
   }
 
   /**
-   * Gets the value of the 'signature' field.
-   * An implementation of a WebPage's signature from which it can be 
identified and referenced at any point in time. This is essentially the 
WebPage's fingerprint represnting its state for any point in time.   */
+   * Gets the value of the 'signature' field. An implementation of a WebPage's
+   * signature from which it can be identified and referenced at any point in
+   * time. This is essentially the WebPage's fingerprint represnting its state
+   * for any point in time.
+   */
   public java.nio.ByteBuffer getSignature() {
     return signature;
   }
 
   /**
-   * Sets the value of the 'signature' field.
-   * An implementation of a WebPage's signature from which it can be 
identified and referenced at any point in time. This is essentially the 
WebPage's fingerprint represnting its state for any point in time.   * @param 
value the value to set.
+   * Sets the value of the 'signature' field. An implementation of a WebPage's
+   * signature from which it can be identified and referenced at any point in
+   * time. This is essentially the WebPage's fingerprint represnting its state
+   * for any point in time. * @param value the value to set.
    */
   public void setSignature(java.nio.ByteBuffer value) {
     this.signature = value;
     setDirty(12);
   }
-  
+
   /**
-   * Checks the dirty status of the 'signature' field. A field is dirty if it 
represents a change that has not yet been written to the database.
-   * An implementation of a WebPage's signature from which it can be 
identified and referenced at any point in time. This is essentially the 
WebPage's fingerprint represnting its state for any point in time.   * @param 
value the value to set.
+   * Checks the dirty status of the 'signature' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. An
+   * implementation of a WebPage's signature from which it can be identified 
and
+   * referenced at any point in time. This is essentially the WebPage's
+   * fingerprint represnting its state for any point in time. * @param value 
the
+   * value to set.
    */
   public boolean isSignatureDirty(java.nio.ByteBuffer value) {
     return isDirty(12);
   }
 
   /**
-   * Gets the value of the 'title' field.
-   * The title of the WebPage.   */
+   * Gets the value of the 'title' field. The title of the WebPage.
+   */
   public java.lang.CharSequence getTitle() {
     return title;
   }
 
   /**
-   * Sets the value of the 'title' field.
-   * The title of the WebPage.   * @param value the value to set.
+   * Sets the value of the 'title' field. The title of the WebPage. * @param
+   * value the value to set.
    */
   public void setTitle(java.lang.CharSequence value) {
     this.title = value;
     setDirty(13);
   }
-  
+
   /**
-   * Checks the dirty status of the 'title' field. A field is dirty if it 
represents a change that has not yet been written to the database.
-   * The title of the WebPage.   * @param value the value to set.
+   * Checks the dirty status of the 'title' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The
+   * title of the WebPage. * @param value the value to set.
    */
   public boolean isTitleDirty(java.lang.CharSequence value) {
     return isDirty(13);
   }
 
   /**
-   * Gets the value of the 'text' field.
-   * The textual content of the WebPage devoid from native markup.   */
+   * Gets the value of the 'text' field. The textual content of the WebPage
+   * devoid from native markup.
+   */
   public java.lang.CharSequence getText() {
     return text;
   }
 
   /**
-   * Sets the value of the 'text' field.
-   * The textual content of the WebPage devoid from native markup.   * @param 
value the value to set.
+   * Sets the value of the 'text' field. The textual content of the WebPage
+   * devoid from native markup. * @param value the value to set.
    */
   public void setText(java.lang.CharSequence value) {
     this.text = value;
     setDirty(14);
   }
-  
+
   /**
-   * Checks the dirty status of the 'text' field. A field is dirty if it 
represents a change that has not yet been written to the database.
-   * The textual content of the WebPage devoid from native markup.   * @param 
value the value to set.
+   * Checks the dirty status of the 'text' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. The
+   * textual content of the WebPage devoid from native markup. * @param value
+   * the value to set.
    */
   public boolean isTextDirty(java.lang.CharSequence value) {
     return isDirty(14);
@@ -602,208 +860,292 @@ public class WebPage extends org.apache.
 
   /**
    * Sets the value of the 'parseStatus' field.
-   * @param value the value to set.
+   * 
+   * @param value
+   *          the value to set.
    */
   public void setParseStatus(org.apache.nutch.storage.ParseStatus value) {
     this.parseStatus = value;
     setDirty(15);
   }
-  
+
   /**
-   * Checks the dirty status of the 'parseStatus' field. A field is dirty if 
it represents a change that has not yet been written to the database.
-   * @param value the value to set.
+   * Checks the dirty status of the 'parseStatus' field. A field is dirty if it
+   * represents a change that has not yet been written to the database.
+   * 
+   * @param value
+   *          the value to set.
    */
   public boolean isParseStatusDirty(org.apache.nutch.storage.ParseStatus 
value) {
     return isDirty(15);
   }
 
   /**
-   * Gets the value of the 'score' field.
-   * A score used to determine a WebPage's relevance within the web graph it 
is part of. This score may change over time based on graph characteristics.   */
+   * Gets the value of the 'score' field. A score used to determine a WebPage's
+   * relevance within the web graph it is part of. This score may change over
+   * time based on graph characteristics.
+   */
   public java.lang.Float getScore() {
     return score;
   }
 
   /**
-   * Sets the value of the 'score' field.
-   * A score used to determine a WebPage's relevance within the web graph it 
is part of. This score may change over time based on graph characteristics.   * 
@param value the value to set.
+   * Sets the value of the 'score' field. A score used to determine a WebPage's
+   * relevance within the web graph it is part of. This score may change over
+   * time based on graph characteristics. * @param value the value to set.
    */
   public void setScore(java.lang.Float value) {
     this.score = value;
     setDirty(16);
   }
-  
+
   /**
-   * Checks the dirty status of the 'score' field. A field is dirty if it 
represents a change that has not yet been written to the database.
-   * A score used to determine a WebPage's relevance within the web graph it 
is part of. This score may change over time based on graph characteristics.   * 
@param value the value to set.
+   * Checks the dirty status of the 'score' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. A score
+   * used to determine a WebPage's relevance within the web graph it is part 
of.
+   * This score may change over time based on graph characteristics. * @param
+   * value the value to set.
    */
   public boolean isScoreDirty(java.lang.Float value) {
     return isDirty(16);
   }
 
   /**
-   * Gets the value of the 'reprUrl' field.
-   * In the case where we are given two urls, a source and a destination of a 
redirect, we should determine and persist the representative url. The logic 
used to determine this is based largely on Yahoo!'s Slurp Crawler   */
+   * Gets the value of the 'reprUrl' field. In the case where we are given two
+   * urls, a source and a destination of a redirect, we should determine and
+   * persist the representative url. The logic used to determine this is based
+   * largely on Yahoo!'s Slurp Crawler
+   */
   public java.lang.CharSequence getReprUrl() {
     return reprUrl;
   }
 
   /**
-   * Sets the value of the 'reprUrl' field.
-   * In the case where we are given two urls, a source and a destination of a 
redirect, we should determine and persist the representative url. The logic 
used to determine this is based largely on Yahoo!'s Slurp Crawler   * @param 
value the value to set.
+   * Sets the value of the 'reprUrl' field. In the case where we are given two
+   * urls, a source and a destination of a redirect, we should determine and
+   * persist the representative url. The logic used to determine this is based
+   * largely on Yahoo!'s Slurp Crawler * @param value the value to set.
    */
   public void setReprUrl(java.lang.CharSequence value) {
     this.reprUrl = value;
     setDirty(17);
   }
-  
+
   /**
-   * Checks the dirty status of the 'reprUrl' field. A field is dirty if it 
represents a change that has not yet been written to the database.
-   * In the case where we are given two urls, a source and a destination of a 
redirect, we should determine and persist the representative url. The logic 
used to determine this is based largely on Yahoo!'s Slurp Crawler   * @param 
value the value to set.
+   * Checks the dirty status of the 'reprUrl' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. In the
+   * case where we are given two urls, a source and a destination of a 
redirect,
+   * we should determine and persist the representative url. The logic used to
+   * determine this is based largely on Yahoo!'s Slurp Crawler * @param value
+   * the value to set.
    */
   public boolean isReprUrlDirty(java.lang.CharSequence value) {
     return isDirty(17);
   }
 
   /**
-   * Gets the value of the 'headers' field.
-   * Header information returned from the web server used to server the 
content which is subsequently fetched from. This includes keys such as 
TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, 
CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED 
and LOCATION.   */
-  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> 
getHeaders() {
+   * Gets the value of the 'headers' field. Header information returned from 
the
+   * web server used to server the content which is subsequently fetched from.
+   * This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING,
+   * CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION,
+   * CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION.
+   */
+  public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> 
getHeaders() {
     return headers;
   }
 
   /**
-   * Sets the value of the 'headers' field.
-   * Header information returned from the web server used to server the 
content which is subsequently fetched from. This includes keys such as 
TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, 
CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED 
and LOCATION.   * @param value the value to set.
-   */
-  public void 
setHeaders(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-    this.headers = (value instanceof org.apache.gora.persistency.Dirtyable) ? 
value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+   * Sets the value of the 'headers' field. Header information returned from 
the
+   * web server used to server the content which is subsequently fetched from.
+   * This includes keys such as TRANSFER_ENCODING, CONTENT_ENCODING,
+   * CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION, CONTENT_DISPOSITION,
+   * CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and LOCATION. * @param value the
+   * value to set.
+   */
+  public void setHeaders(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+    this.headers = (value instanceof org.apache.gora.persistency.Dirtyable) ? 
value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(18);
   }
-  
+
   /**
-   * Checks the dirty status of the 'headers' field. A field is dirty if it 
represents a change that has not yet been written to the database.
-   * Header information returned from the web server used to server the 
content which is subsequently fetched from. This includes keys such as 
TRANSFER_ENCODING, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, 
CONTENT_LOCATION, CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED 
and LOCATION.   * @param value the value to set.
+   * Checks the dirty status of the 'headers' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. Header
+   * information returned from the web server used to server the content which
+   * is subsequently fetched from. This includes keys such as 
TRANSFER_ENCODING,
+   * CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_LOCATION,
+   * CONTENT_DISPOSITION, CONTENT_MD5, CONTENT_TYPE, LAST_MODIFIED and 
LOCATION.
+   * * @param value the value to set.
    */
-  public boolean 
isHeadersDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> 
value) {
+  public boolean isHeadersDirty(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
     return isDirty(18);
   }
 
   /**
-   * Gets the value of the 'outlinks' field.
-   * Embedded hyperlinks which direct outside of the current domain.   */
-  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> 
getOutlinks() {
+   * Gets the value of the 'outlinks' field. Embedded hyperlinks which direct
+   * outside of the current domain.
+   */
+  public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> 
getOutlinks() {
     return outlinks;
   }
 
   /**
-   * Sets the value of the 'outlinks' field.
-   * Embedded hyperlinks which direct outside of the current domain.   * 
@param value the value to set.
+   * Sets the value of the 'outlinks' field. Embedded hyperlinks which direct
+   * outside of the current domain. * @param value the value to set.
    */
-  public void 
setOutlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) 
{
-    this.outlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? 
value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+  public void setOutlinks(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+    this.outlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? 
value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(19);
   }
-  
+
   /**
-   * Checks the dirty status of the 'outlinks' field. A field is dirty if it 
represents a change that has not yet been written to the database.
-   * Embedded hyperlinks which direct outside of the current domain.   * 
@param value the value to set.
+   * Checks the dirty status of the 'outlinks' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. 
Embedded
+   * hyperlinks which direct outside of the current domain. * @param value the
+   * value to set.
    */
-  public boolean 
isOutlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> 
value) {
+  public boolean isOutlinksDirty(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
     return isDirty(19);
   }
 
   /**
-   * Gets the value of the 'inlinks' field.
-   * Embedded hyperlinks which link to pages within the current domain.   */
-  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> 
getInlinks() {
+   * Gets the value of the 'inlinks' field. Embedded hyperlinks which link to
+   * pages within the current domain.
+   */
+  public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> 
getInlinks() {
     return inlinks;
   }
 
   /**
-   * Sets the value of the 'inlinks' field.
-   * Embedded hyperlinks which link to pages within the current domain.   * 
@param value the value to set.
+   * Sets the value of the 'inlinks' field. Embedded hyperlinks which link to
+   * pages within the current domain. * @param value the value to set.
    */
-  public void 
setInlinks(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-    this.inlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? 
value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+  public void setInlinks(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+    this.inlinks = (value instanceof org.apache.gora.persistency.Dirtyable) ? 
value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(20);
   }
-  
+
   /**
-   * Checks the dirty status of the 'inlinks' field. A field is dirty if it 
represents a change that has not yet been written to the database.
-   * Embedded hyperlinks which link to pages within the current domain.   * 
@param value the value to set.
+   * Checks the dirty status of the 'inlinks' field. A field is dirty if it
+   * represents a change that has not yet been written to the database. 
Embedded
+   * hyperlinks which link to pages within the current domain. * @param value
+   * the value to set.
    */
-  public boolean 
isInlinksDirty(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> 
value) {
+  public boolean isInlinksDirty(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
     return isDirty(20);
   }
 
   /**
-   * Gets the value of the 'markers' field.
-   * Markers flags which represent user and machine decisions which have 
affected influenced a WebPage's current state. Markers can be system specific 
and user machine driven in nature. They are assigned to a WebPage on a 
job-by-job basis and thier values indicative of what actions should be 
associated with a WebPage.   */
-  public java.util.Map<java.lang.CharSequence,java.lang.CharSequence> 
getMarkers() {
+   * Gets the value of the 'markers' field. Markers flags which represent user
+   * and machine decisions which have affected influenced a WebPage's current
+   * state. Markers can be system specific and user machine driven in nature.
+   * They are assigned to a WebPage on a job-by-job basis and thier values
+   * indicative of what actions should be associated with a WebPage.
+   */
+  public java.util.Map<java.lang.CharSequence, java.lang.CharSequence> 
getMarkers() {
     return markers;
   }
 
   /**
-   * Sets the value of the 'markers' field.
-   * Markers flags which represent user and machine decisions which have 
affected influenced a WebPage's current state. Markers can be system specific 
and user machine driven in nature. They are assigned to a WebPage on a 
job-by-job basis and thier values indicative of what actions should be 
associated with a WebPage.   * @param value the value to set.
-   */
-  public void 
setMarkers(java.util.Map<java.lang.CharSequence,java.lang.CharSequence> value) {
-    this.markers = (value instanceof org.apache.gora.persistency.Dirtyable) ? 
value : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
+   * Sets the value of the 'markers' field. Markers flags which represent user
+   * and machine decisions which have affected influenced a WebPage's current
+   * state. Markers can be system specific and user machine driven in nature.
+   * They are assigned to a WebPage on a job-by-job basis and thier values
+   * indicative of what actions should be associated with a WebPage. * @param
+   * value the value to set.
+   */
+  public void setMarkers(
+      java.util.Map<java.lang.CharSequence, java.lang.CharSequence> value) {
+    this.markers = (value instanceof org.apache.gora.persistency.Dirtyable) ? 
value
+        : new org.apache.gora.persistency.impl.DirtyMapWrapper(value);
     setDirty(21);
   }

[... 2576 lines stripped ...]

Reply via email to