Dear Wiki user, You have subscribed to a wiki page or wiki category on "Nutch Wiki" for change notification.
The "NutchFileFormats" page has been changed by LewisJohnMcgibbney: https://wiki.apache.org/nutch/NutchFileFormats?action=diff&rev1=8&rev2=9 == File Formats == - {{{#!CSV , + - file,key datatype,value datatype,codec + ||file||key datatype||value datatype||codec|| - data,org.apache.hadoop.io.Text,org.apache.nutch.crawl.CrawlDatum, + ||data||org.apache.hadoop.io.Text||org.apache.nutch.crawl.CrawlDatum||org.apache.hadoop.io.compress.DefaultCodec|| - index,org.apache.hadoop.io.Text,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.compress.DefaultCodec + ||index||org.apache.hadoop.io.Text||org.apache.hadoop.io.LongWritable||org.apache.hadoop.io.compress.DefaultCodec|| - }}} + = LinkDB = @@ -110, +110 @@ == File Formats == - {{{#!CSV , - file,key datatype,value datatype,codec + ||file||key datatype||value datatype||codec|| - data,org.apache.hadoop.io.Text,org.apache.nutch.crawl.Inlinks,org.apache.hadoop.io.compress.DefaultCodec + ||data||org.apache.hadoop.io.Text||org.apache.nutch.crawl.Inlinks||org.apache.hadoop.io.compress.DefaultCodec|| - index,org.apache.hadoop.io.Text,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.compress.DefaultCodec + ||index||org.apache.hadoop.io.Text||org.apache.hadoop.io.LongWritable||org.apache.hadoop.io.compress.DefaultCodec|| - }}} = Segments = @@ -155, +153 @@ == Description == - {{{#!CSV , - Subdirectory,file,key datatype,value datatype,codec - content,data,org.apache.hadoop.io.Text,org.apache.nutch.protocol.Content,org.apache.hadoop.io.compress.DefaultCodec + ||content||data||org.apache.hadoop.io.Text||org.apache.nutch.protocol.Content||org.apache.hadoop.io.compress.DefaultCodec|| - content,index,org.apache.hadoop.io.Text,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.compress.DefaultCodec + ||content||index||org.apache.hadoop.io.Text||org.apache.hadoop.io.LongWritable||org.apache.hadoop.io.compress.DefaultCodec|| - crawl_fetch,data,org.apache.hadoop.io.Text,org.apache.nutch.crawl.CrawlDatum,org.apache.hadoop.io.compress.DefaultCodec + ||crawl_fetch||data||org.apache.hadoop.io.Text||org.apache.nutch.crawl.CrawlDatum||org.apache.hadoop.io.compress.DefaultCodec|| - crawl_fetch,index,org.apache.hadoop.io.Text,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.compress.DefaultCodec + ||crawl_fetch||index||org.apache.hadoop.io.Text||org.apache.hadoop.io.LongWritable||org.apache.hadoop.io.compress.DefaultCodec|| - crawl_generate,part-0000,org.apache.hadoop.io.Text,org.apache.nutch.crawl.CrawlDatum,org.apache.hadoop.io.compress.DefaultCodec + ||crawl_generate||part-0000||org.apache.hadoop.io.Text||org.apache.nutch.crawl.CrawlDatum||org.apache.hadoop.io.compress.DefaultCodec|| - crawl_parse,data,org.apache.hadoop.io.Text,org.apache.nutch.crawl.CrawlDatum,org.apache.hadoop.io.compress.DefaultCodec + ||crawl_parse||data||org.apache.hadoop.io.Text||org.apache.nutch.crawl.CrawlDatum||org.apache.hadoop.io.compress.DefaultCodec|| - crawl_parse,index,org.apache.hadoop.io.Text,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.compress.DefaultCodec + ||crawl_parse||index||org.apache.hadoop.io.Text||org.apache.hadoop.io.LongWritable||org.apache.hadoop.io.compress.DefaultCodec|| - parse_data,data,org.apache.hadoop.io.Text,org.apache.nutch.parse.ParseData,org.apache.hadoop.io.compress.DefaultCodec + ||parse_data||data||org.apache.hadoop.io.Text||org.apache.nutch.parse.ParseData||org.apache.hadoop.io.compress.DefaultCodec|| - parse_data,index,org.apache.hadoop.io.Text,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.compress.DefaultCodec + ||parse_data||index||org.apache.hadoop.io.Text||org.apache.hadoop.io.LongWritable||org.apache.hadoop.io.compress.DefaultCodec|| - parse_text,data,org.apache.hadoop.io.Text,org.apache.nutch.parse.ParseText,org.apache.hadoop.io.compress.DefaultCodec + ||parse_text||data||org.apache.hadoop.io.Text||org.apache.nutch.parse.ParseText||org.apache.hadoop.io.compress.DefaultCodec|| - parse_text,index,org.apache.hadoop.io.Text,org.apache.hadoop.io.LongWritable,org.apache.hadoop.io.compress.DefaultCodec + ||parse_text||index||org.apache.hadoop.io.Text||org.apache.hadoop.io.LongWritable||org.apache.hadoop.io.compress.DefaultCodec|| - }}} = Old File Format Documentation =