As a reference here is my code listing. Some points to note are: 1. In the debug mode (compiler flag -g) the relative path does not work I have to use the absolute path for opendir to work 2. I had to remove the 'm' characters in the fscan format to make it "%[^\r\n] %[\x01-\x7F]" from "%m[^\r\n] %m[\x01-\x7F]". After removing the 'm' the Release executable works but does nothing otherwise with the m it just gives the error message that it cannot extract the title/bodytext 3. If I have the m characters removed the debug mode shows segmentation fault at this line: String *value = Str_new_from_utf8(title, strlen(title));
Any help and pointers to help me get started would be very helpful. Thanks, Milind -------------------------------------------------CODE BELOW---------------------------------------------------------- /* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include <dirent.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #define CFISH_USE_SHORT_NAMES #define LUCY_USE_SHORT_NAMES #include "Clownfish/String.h" #include "Lucy/Simple.h" #include "Lucy/Document/Doc.h" const char path_to_index[] = "D:/lucy_index"; //const char uscon_source[] = "E:/Milind/Technical/My Work/My Programs/C_C++/__Playground/Apache Lucy/SimpleTutorial/common/sample/us_constitution"; const char uscon_source[] = "../../common/sample/us_constitution"; bool S_ends_with(const char *str, const char *postfix) { size_t len = strlen(str); size_t postfix_len = strlen(postfix); return len >= postfix_len && memcmp(str + len - postfix_len, postfix, postfix_len) == 0; } Doc* S_parse_file(const char *filename) { size_t bytes = strlen(uscon_source) + 1 + strlen(filename) + 1; char *path = (char*)malloc(bytes); path[0] = '\0'; strcat(path, uscon_source); strcat(path, "/"); strcat(path, filename); FILE *stream = fopen(path, "rb"); if (stream == NULL) { perror(path); exit(1); } char *title = NULL; char *bodytext = NULL; if (fscanf(stream, "%[^\r\n] %[\x01-\x7F]", &title, &bodytext) != 2) { fprintf(stderr, "Can't extract title/bodytext from '%s'", path); exit(1); } Doc *doc = Doc_new(NULL, 0); { // Store 'title' field String *field = Str_newf("title"); String *value = Str_new_from_utf8(title, strlen(title)); Doc_Store(doc, field, (Obj*)value); DECREF(field); DECREF(value); } { // Store 'content' field String *field = Str_newf("content"); String *value = Str_new_from_utf8(bodytext, strlen(bodytext)); Doc_Store(doc, field, (Obj*)value); DECREF(field); DECREF(value); } { // Store 'url' field String *field = Str_newf("url"); String *value = Str_new_from_utf8(filename, strlen(filename)); Doc_Store(doc, field, (Obj*)value); DECREF(field); DECREF(value); } fclose(stream); free(bodytext); free(title); free(path); return doc; } int main() { // Initialize the library. lucy_bootstrap_parcel(); String *folder = Str_newf("%s", path_to_index); String *language = Str_newf("en"); Simple *lucy = Simple_new((Obj*)folder, language); DIR *dir = opendir(uscon_source); if (dir == NULL) { perror(uscon_source); return 1; } printf("Directory opened\n"); for (struct dirent *entry = readdir(dir); entry; entry = readdir(dir)) { if (S_ends_with(entry->d_name, ".txt")) { Doc *doc = S_parse_file(entry->d_name); Simple_Add_Doc(lucy, doc); // ta-da! DECREF(doc); } } closedir(dir); DECREF(lucy); DECREF(language); DECREF(folder); return 0; } --------------------------------------------CODE ENDS-------------------------------------------------- On Fri, Jul 1, 2016 at 11:53 AM, Milind Gupta <[email protected]> wrote: > Can anyone help me running the SimpleTutorial. > > Thanks, > Milind > > On Wed, Jun 1, 2016 at 3:53 PM, Milind Gupta <[email protected]> > wrote: > >> Hi, >> I tried running the Simple Tutorial example given on teh page: >> http://lucy.apache.org/docs/perl/Lucy/Docs/Tutorial/SimpleTutorial.html. >> It compiles fine. When I ran it I got an error saying "Can't extract >> title/bodytext from amend1.txt". After I changed the pattern in the fscanf >> by removing the 2 m's after the % signs then it worked. But I don't see the >> index file being created. Running the Search program returns that it cannot >> find the index file. Is there some command missing to actually write the >> index file to the disk? >> I am running this on Windows 10 and this is 0.5.1 version of >> Apache Lucy. >> >> Thanks, >> Milind >> >> >
