I am using this code which is pretty basic. And it won't index the documents. I run the index code and print the document to make sure that it gets indexed, but when I looked at the output "gen" and "segments" file, there are only like 20bytes of data in the files. I am indexing about 300k of text data. I am using scala but I dont think that is an issue as I have used similar code before. When I do a search 0 documents for everything.
I am using Lucene 2.2.0 (just downloaded). Here is the core of my code: def indexData(writer:IndexWriter, file: File) { val doc = new LucDocument() // Read the content from the file val contentReader = new ContentReader(file.getAbsolutePath) val (title, content) = contentReader.readFile() // Extract data from the java File class val link = new DocumentLink(file.getAbsolutePath, file.getName, content, file.getAbsolutePath) // Index the document and data. doc.add(new Field(LUC_KEY_FULL_PATH, link.fullPath, Field.Store.YES, Field.Index.TOKENIZED)) doc.add(new Field(LUC_KEY_FILE_NAME, link.filename, Field.Store.YES, Field.Index.TOKENIZED)) doc.add(new Field(LUC_KEY_CONTENT, link.content, Field.Store.YES, Field.Index.TOKENIZED)) doc.add(new Field(LUC_KEY_IDENTITY, link.id, Field.Store.YES, Field.Index.UN_TOKENIZED)) writer.addDocument(doc) } But if you want the full scala source, here it is. Think of it as pseudo code: object BotlistIndexDocuments { val LUC_KEY_FULL_PATH = "full_path" val LUC_KEY_FILE_NAME = "file_name" val LUC_KEY_CONTENT = "content" val LUC_KEY_IDENTITY = "id" // // Read the content file. The first line should contain // a "#title summary" line and the rest of the document // will contain the "wiki" document. class ContentReader(filename: String) { def readFile(): (String, String) = { val file = Source.fromFile(filename) var counted = file.getLines.counted val fileData = new StringBuilder() var title = "" counted.foreach { (line: String) => if (counted.count == 0) { //title = line.substring(6).trim() title = line } else { fileData.append(line) } } (title, fileData.toString()) } } // End of Class // case class DocumentLink(abs_path: String, file: String, data: String, unique_id:String) { val fullPath = abs_path val filename = file val content = data val id = unique_id } def indexData(writer:IndexWriter, file: File) { val doc = new LucDocument() // Read the content from the file val contentReader = new ContentReader(file.getAbsolutePath) val (title, content) = contentReader.readFile() // Extract data from the java File class val link = new DocumentLink(file.getAbsolutePath, file.getName, content, file.getAbsolutePath) // Index the document and data. doc.add(new Field(LUC_KEY_FULL_PATH, link.fullPath, Field.Store.YES, Field.Index.TOKENIZED)) doc.add(new Field(LUC_KEY_FILE_NAME, link.filename, Field.Store.YES, Field.Index.TOKENIZED)) doc.add(new Field(LUC_KEY_CONTENT, link.content, Field.Store.YES, Field.Index.TOKENIZED)) doc.add(new Field(LUC_KEY_IDENTITY, link.id, Field.Store.YES, Field.Index.UN_TOKENIZED)) writer.addDocument(doc) } // // Utility for recursively walking directory tree // See: // override final def flatMap [B](f : (A) => Iterable[B]) : List[B] class DocWalkFile(file: File) { def children = new Iterable[File] { def elements = if (file.isDirectory) file.listFiles.elements else Iterator.empty; } def andTree : Iterable[File] = ( Seq.single(file) ++ children.flatMap(child => new DocWalkFile(child).andTree)) } def listDocuments(dir: File): List[File] = (new DocWalkFile(dir)).andTree.toList filter (f => (f.getName.endsWith(".java") || f.getName.endsWith(".txt"))) def indexDocuments(index_dir: File, files: List[File]) { Console.println("INFO: number of files to index=" + files.length) val writer = new IndexWriter(index_dir, new StandardAnalyzer(), true) for (val file <- files) { indexData(writer, file) } } def main(args: Array[String]): Unit = { if (args.length != 2) { Console.println("usage: java BotlistIndexDocuments parent-index-dir input-doc-dir") Console.println("\n") Console.println("\nRun the BotlistIndexDocuments index tool on the provided index directory.") Console.println("\nFor bug reporting instructions, please see:") Console.println("<URL:http://code.google.com/p/openbotlist>.") return } Console.println("INFO: Indexing Document Data <standby> ...") val index = new File(args(0) + "/index") val doc_dir = new File(args(1)) if (!index.exists()) { index.mkdir(); Console.println("Creating index directory.") } else { Console.println("WARN: Index already exists (remove directory to continue)") Console.println("DIR: " + index.getAbsolutePath()) //return } // Calculate the processing time to run application val timeStart = System.currentTimeMillis() indexDocuments(index, (listDocuments(doc_dir))) val timeEnd = System.currentTimeMillis() Console.println("Done...") Console.println("Completed processing in " + (timeEnd - timeStart) + " ms.") } } -- Berlin Brown [berlin dot brown at gmail dot com] http://botspiritcompany.com/botlist/? --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]