Added: incubator/lucene4c/trunk/include/lcn_istream.h URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/include/lcn_istream.h?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/include/lcn_istream.h (added) +++ incubator/lucene4c/trunk/include/lcn_istream.h Sat Feb 26 08:47:57 2005 @@ -0,0 +1,144 @@ +/* Copyright 2004-2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file lcn_istream.h + * @brief Routines for manipulating read-only streams of data + */ + +#ifndef _LCN_ISTREAM_H +#define _LCN_ISTREAM_H + +#include "lcn_types.h" + +#include <apr_file_io.h> + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** Opaque structure representing a stream of data. */ +typedef struct lcn_istream_t lcn_istream_t; + +/** Return a memory backed istream containing @a length bytes of data from + * @a buffer, the stream is allocated from @a pool. + */ +lcn_istream_t * +lcn_ram_istream_create (const char *buffer, + apr_size_t length, + apr_pool_t *pool); + +/** Return a file backed istream that wraps @a file, allocated from @a pool. */ +lcn_istream_t * +lcn_file_istream_create (apr_file_t *file, apr_pool_t *pool); + +/** Read a single byte from @a stream into @a byte, using @a pool for any + * allocation. + */ +lcn_error_t * +lcn_istream_read_byte (lcn_istream_t *stream, + char *byte, + apr_pool_t *pool); + +/** Read @a len bytes from @a stream into @a buffer, using @a pool for any + * allocation + */ +lcn_error_t * +lcn_istream_read_bytes (lcn_istream_t *stream, + char *buffer, + apr_size_t len, + apr_pool_t *pool); + +/** Read a 32 bit integer from @a stream into @a i, using @a pool for any + * allocation + */ +lcn_error_t * +lcn_istream_read_int (lcn_istream_t *stream, + apr_uint32_t *i, + apr_pool_t *pool); + +/** Read a variable length 32 bit integer from @a steam into @a i, using + * @a pool for any allocation. + */ +lcn_error_t * +lcn_istream_read_vint (lcn_istream_t *stream, + apr_uint32_t *i, + apr_pool_t *pool); + +/** Read a 64 bit integer from @a stream into @a l, using @a pool for any + * allocation. + */ +lcn_error_t * +lcn_istream_read_long (lcn_istream_t *stream, + apr_uint64_t *l, + apr_pool_t *pool); + +/** Read a variable length 64 bit integer from @a stream into @a l, using + * @a pool for any allocation. + */ +lcn_error_t * +lcn_istream_read_vlong (lcn_istream_t *stream, + apr_uint64_t *l, + apr_pool_t *pool); + +/** Read a string from @a stream into @a str, allocated from @a pool. */ +lcn_error_t * +lcn_istream_read_string (lcn_istream_t *stream, + lcn_char_t **str, + apr_pool_t *pool); + +/** Read @a length chars from @a stream, returning them in @a buffer, using + * @a pool for allocation. */ +lcn_error_t * +lcn_istream_read_chars (lcn_istream_t *stream, + lcn_char_t **buffer, + apr_size_t length, + apr_pool_t *pool); + +/** Seek @a stream to @a offset, using @a pool for temporary allocations. */ +lcn_error_t * +lcn_istream_seek (lcn_istream_t *stream, apr_off_t offset, apr_pool_t *pool); + +/** Close @a stream, using @a pool for temporary allocations. */ +lcn_error_t * +lcn_istream_close (lcn_istream_t *stream, apr_pool_t *pool); + +/** Return the length of @a stream in @a length, using @a pool for temporary + * allocations. + */ +lcn_error_t * +lcn_istream_length (lcn_istream_t *stream, + apr_size_t *length, + apr_pool_t *pool); + +/** Return the current position of @a stream in @a offset, using @a pool for + * temporary allocations. + */ +lcn_error_t * +lcn_istream_offset (lcn_istream_t *stream, + apr_off_t *offset, + apr_pool_t *pool); + +/** Return a duplicate of @a source in @a dest, allocated within @a pool. */ +lcn_error_t * +lcn_istream_duplicate (lcn_istream_t **dest, + lcn_istream_t *source, + apr_pool_t *pool); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif
Added: incubator/lucene4c/trunk/include/lcn_query.h URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/include/lcn_query.h?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/include/lcn_query.h (added) +++ incubator/lucene4c/trunk/include/lcn_query.h Sat Feb 26 08:47:57 2005 @@ -0,0 +1,83 @@ +/* Copyright 2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file lcn_query.h + * @brief Routines for working with queries + */ + +#ifndef _LCN_QUERY_H +#define _LCN_QUERY_H + +#include "lcn_types.h" +#include "lcn_scorer.h" +#include "lcn_term.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** Create a @a query that matches @a term, allocated in @a pool. */ +lcn_error_t * +lcn_term_query_create (lcn_query_t **query, + lcn_term_t *term, + apr_pool_t *pool); + +/** Create a @a query that matches a number of other queries, each of which + * either MUST occur, SHOULD occur, or MUST NOT occur in the documents matched, + * the query is allocated in @a pool. + */ +lcn_error_t * +lcn_boolean_query_create (lcn_query_t **query, apr_pool_t *pool); + +/** An indication of what requirement is placed on a particular part of a + * boolean query. + */ +typedef enum { + LCN_MUST, /**< The query must match */ + LCN_SHOULD, /**< At least one of the queries with this occur must match */ + LCN_MUST_NOT /**< The query must not match */ +} lcn_boolean_clause_occur_t; + +/** Add @a clause to @a query, with @a occur as the specification for whether + * it is required or not. + * + * @note all clauses that are added to @a query must be allocated from a pool + * that lives at least as long as the pool used to allocate @a query, or the + * behavior is undefined. + */ +lcn_error_t * +lcn_boolean_query_add (lcn_query_t *query, + lcn_query_t *clause, + lcn_boolean_clause_occur_t occur); + +/** Return a @a scorer for @a query run over @a index, allocated in + * @a pool. + * + * @note the Java Lucene version of this stuff works on a Weight, not a + * Query, but that's mainly because you are supposed to be able to reuse + * a Query, so we can make that split later. + */ +lcn_error_t * +lcn_query_scorer (lcn_scorer_t **scorer, + lcn_query_t *query, + lcn_index_t *index, + apr_pool_t *pool); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _LCN_QUERY_H */ Added: incubator/lucene4c/trunk/include/lcn_scorer.h URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/include/lcn_scorer.h?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/include/lcn_scorer.h (added) +++ incubator/lucene4c/trunk/include/lcn_scorer.h Sat Feb 26 08:47:57 2005 @@ -0,0 +1,74 @@ +/* Copyright 2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file lcn_scorer.h + * @brief Routines for working with scorers + */ + +#ifndef _LCN_SCORER_H +#define _LCN_SCORER_H + +#include "lcn_types.h" +#include "lcn_index.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** Abstract scorer object. */ +typedef struct lcn_scorer_t lcn_scorer_t; + +/** Abstract query object. + * + * This is declared here to avoid problems with circular dependencies + * between queries and scorers. + */ +typedef struct lcn_query_t lcn_query_t; + +/** Create a @a scorer that returns scores for each document matched by + * @a query in @a iter, allocated in @a pool. + */ +lcn_error_t * +lcn_term_scorer_create (lcn_scorer_t **scorer, + lcn_doc_iter_t *iter, + apr_pool_t *pool); + +/** Create a @a scorer that returns scores for documents matched by the + * arrays of queries @a must, @a should, and @a must_not, searching in + * @a index, allocated in @a pool. + */ +lcn_error_t * +lcn_boolean_scorer_create (lcn_scorer_t **scorer, + apr_array_header_t *must, + apr_array_header_t *should, + apr_array_header_t *must_not, + lcn_index_t *index, + apr_pool_t *pool); + +/** Advance @a scorer to the next document. */ +lcn_error_t * lcn_scorer_next (lcn_scorer_t *scorer); + +/** Return the current document for @a scorer. */ +apr_uint32_t lcn_scorer_doc (lcn_scorer_t *scorer); + +/** Return the score for the current document for @a scorer. */ +float lcn_scorer_score (lcn_scorer_t *scorer); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _LCN_SCORER_H */ Added: incubator/lucene4c/trunk/include/lcn_segment.h URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/include/lcn_segment.h?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/include/lcn_segment.h (added) +++ incubator/lucene4c/trunk/include/lcn_segment.h Sat Feb 26 08:47:57 2005 @@ -0,0 +1,83 @@ +/* Copyright 2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file lcn_segment.h + * @brief Routines for manipulating a segment of a lucene index + */ + +#ifndef _LCN_SEGMENT_H +#define _LCN_SEGMENT_H + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#include "lcn_types.h" +#include "lcn_directory.h" +#include "lcn_term.h" +#include "lcn_document.h" + +/** An opaque structure representing a segment of a lucene index. */ +typedef struct lcn_segment_t lcn_segment_t; + +/** Open the segment @a segname, of size @a size, within directory @a d and + * return it in @a segment, allocated from @a pool. + */ +lcn_error_t * +lcn_segment_open (lcn_segment_t **segment, + const lcn_char_t *segname, + apr_uint32_t size, + const lcn_directory_t *d, + apr_pool_t *pool); + +/** Return the name of @a segment. */ +const lcn_char_t * +lcn_segment_name (const lcn_segment_t *segment); + +/** Return the size (in documents) of @a segment. */ +apr_uint32_t +lcn_segment_size (const lcn_segment_t *segment); + +/** Return the directory @a segment is in. */ +const lcn_directory_t * +lcn_segment_directory (const lcn_segment_t *segment); + +/** Return the document frquency @a doc_freq, document numbers @a docs, and + * the frequencies @a freqs of the documents within @a segment that contain + * @a term, using @a pool for allocation. + */ +lcn_error_t * +lcn_segment_term_docs (apr_uint32_t *doc_freq, + apr_uint32_t **docs, + apr_uint32_t **freqs, + lcn_segment_t *segment, + const lcn_term_t *term, + apr_pool_t *pool); + +/** Return the document @a doc from @a segment that has the number @a docnum, + * allocated within @a pool. + */ +lcn_error_t * +lcn_segment_get_document (lcn_document_t **doc, + lcn_segment_t *segment, + apr_uint32_t docnum, + apr_pool_t *pool); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif Added: incubator/lucene4c/trunk/include/lcn_segments.h URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/include/lcn_segments.h?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/include/lcn_segments.h (added) +++ incubator/lucene4c/trunk/include/lcn_segments.h Sat Feb 26 08:47:57 2005 @@ -0,0 +1,99 @@ +/* Copyright 2004-2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file lcn_segments.h + * @brief Routines for reading the segments contained within a lucen index + */ + +#ifndef _LCN_SEGMENTS_H +#define _LCN_SEGMENTS_H + +#include "lcn_types.h" +#include "lcn_directory.h" +#include "lcn_term.h" +#include "lcn_document.h" + +#include <apr_pools.h> +#include <apr_tables.h> + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** Opaque structure representing the segments within an index. */ +typedef struct lcn_segments_t lcn_segments_t; + +/** Read in the segments file contained in @a directory and return it in + * @a segments, allocated within @a pool. + */ +lcn_error_t * +lcn_segments_read (lcn_segments_t **segments, + const lcn_directory_t *directory, + apr_pool_t *pool); + +/** Return the version of @a segments. + * + * This can be used to detect if the segment has been modified. + */ +apr_uint64_t lcn_segments_version (const lcn_segments_t *segments); + +/** Return the number of segments within @a segments. */ +unsigned int lcn_segments_count (const lcn_segments_t *segments); + +/** Return the size (in documents) of segment @a name within @a segments. */ +apr_uint32_t lcn_segments_segsize (const lcn_segments_t *segments, + const lcn_char_t *name); + +/** Return the names of the segments within @a segments, allocated in @a pool. + * + * The array holds @c lcn_char_t pointers. + */ +apr_array_header_t * +lcn_segments_names (const lcn_segments_t *segments, apr_pool_t *pool); + +/** Return the max docs contained within @a segments, using @a pool for + * temporary allocation. + */ +apr_uint32_t +lcn_segments_max_docs (const lcn_segments_t *segments, apr_pool_t *pool); + +/** Return the document freqency @a doc_freq, document numbers @a docs, and + * frequencies @a freqs of the documents containing @a term in segment number + * @a segnum within @a segments, using @a pool for allocation. + */ +lcn_error_t * +lcn_segments_term_docs (apr_uint32_t *doc_freq, + apr_uint32_t **docs, + apr_uint32_t **freqs, + lcn_segments_t *segments, + apr_uint32_t segnum, + const lcn_term_t *term, + apr_pool_t *pool); + +/** Return the document @a docnum from @a segs in @a doc, using @a pool for + * allocation. + */ +lcn_error_t * +lcn_segments_get_document (lcn_document_t **doc, + lcn_segments_t *segs, + apr_uint32_t docnum, + apr_pool_t *pool); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif Added: incubator/lucene4c/trunk/include/lcn_term.h URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/include/lcn_term.h?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/include/lcn_term.h (added) +++ incubator/lucene4c/trunk/include/lcn_term.h Sat Feb 26 08:47:57 2005 @@ -0,0 +1,59 @@ +/* Copyright 2004-2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file lcn_term.h + * @brief Routines for handling lucene terms + */ + +#ifndef _LCN_TERM_H +#define _LCN_TERM_H + +#include "lcn_types.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** A lucene term. */ +typedef struct { + lcn_char_t *contents; /**< The actual contents of the term */ + lcn_char_t *field; /**< The field the term is found in */ +} lcn_term_t; + +/** Create a new term with contents @a contents and field @a field, allocated + * in @a pool. + */ +lcn_term_t * +lcn_term_create (const lcn_char_t *contents, + const lcn_char_t *field, + apr_pool_t *pool); + +/** Like @c lcn_term_create, but with cstring arguments. */ +lcn_term_t * +lcn_term_create_cstring (const char *contents, + const char *field, + apr_pool_t *pool); + +/** Compare terms @a one and @a two, returning an integer less than, equal to, + * or greater than if @a one is less than, equal to, or greater than @a two. + */ +int lcn_term_compare (const lcn_term_t *one, const lcn_term_t *two); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _LCN_TERM_H */ Added: incubator/lucene4c/trunk/include/lcn_terminfos.h URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/include/lcn_terminfos.h?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/include/lcn_terminfos.h (added) +++ incubator/lucene4c/trunk/include/lcn_terminfos.h Sat Feb 26 08:47:57 2005 @@ -0,0 +1,107 @@ +/* Copyright 2004-2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file lcn_terminfos.h + * @brief Routines for manipulating the terminfos file from a lucene segment + */ + +#ifndef _LCN_TERMINFOS_H +#define _LCN_TERMINFOS_H + +#include "lcn_types.h" +#include "lcn_directory.h" +#include "lcn_term.h" + +#include <apr_hash.h> +#include <apr_file_io.h> + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** Opaque structure representing the information about a particular term. */ +typedef struct lcn_terminfo_t lcn_terminfo_t; + +/** Opaque structure representing the contents of the terminfos index. */ +typedef struct lcn_terminfos_idx_t lcn_terminfos_idx_t; + +/** Opaque structure representing a segments terminfos file. */ +typedef struct lcn_terminfos_t lcn_terminfos_t; + +/** Return the term @a ti corresponds to. */ +const lcn_term_t *lcn_terminfo_term (const lcn_terminfo_t *ti); + +/** Return the field number @a ti corresponds to. */ +apr_uint32_t lcn_terminfo_field_num (const lcn_terminfo_t *ti); + +/** Return the document frequency for terminfo @a ti. */ +apr_uint32_t lcn_terminfo_doc_freq (const lcn_terminfo_t *ti); + +/** Return the proximity position for terminfo @a ti. */ +apr_uint64_t lcn_terminfo_prox_position (const lcn_terminfo_t *ti); + +/** Return the frequency position for terminfo @a ti. */ +apr_uint64_t lcn_terminfo_freq_position (const lcn_terminfo_t *ti); + +/** Return the skip offset for terminfo @a ti. */ +apr_uint32_t lcn_terminfo_skip_offset (const lcn_terminfo_t *ti); + +/** Return a copy of terminfo @a ti, allocated in @a pool. */ +lcn_terminfo_t * +lcn_terminfo_copy (const lcn_terminfo_t *ti, apr_pool_t *pool); + +/** Read the terminfos index for @a segment in directory @a directory and + * return it in @a tisidx, allocated in @a pool. + */ +lcn_error_t * +lcn_terminfos_idx_read (lcn_terminfos_idx_t **tisidx, + const lcn_directory_t *directory, + const lcn_char_t *segment, + apr_pool_t *pool); + +/** Return the count of the terminfos in @a tisidx. */ +apr_uint64_t +lcn_terminfos_idx_get_count (const lcn_terminfos_idx_t *tisidx); + +/** Return the terminfo @a ti at @a position in @a tisidx. */ +lcn_error_t * +lcn_terminfos_idx_get_terminfo (lcn_terminfo_t **ti, + const lcn_terminfos_idx_t *tisidx, + apr_uint64_t position); + +/** Open the terminfos file for @a segment in @a directory, return it in + * @a tis, allocated in @a pool. + */ +lcn_error_t * +lcn_terminfos_open (lcn_terminfos_t **tis, + const lcn_directory_t *directory, + const lcn_char_t *segment, + apr_pool_t *pool); + +/** Return the terminfo @a ti from @a tis that corresponds to @a term, + * allocated in @a pool. + */ +lcn_error_t * +lcn_terminfos_get_terminfo (lcn_terminfo_t **ti, + const lcn_terminfos_t *tis, + const lcn_term_t *term, + apr_pool_t *pool); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif Added: incubator/lucene4c/trunk/include/lcn_types.h URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/include/lcn_types.h?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/include/lcn_types.h (added) +++ incubator/lucene4c/trunk/include/lcn_types.h Sat Feb 26 08:47:57 2005 @@ -0,0 +1,138 @@ +/* Copyright 2004-2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file lcn_types.h + * @brief Random stuff that doesn't fit anywhere else + */ + +#ifndef _LCN_TYPES_H +#define _LCN_TYPES_H + +#include <apr.h> +#include <apr_pools.h> + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** Since we use C89 we need a boolean type... */ +typedef int lcn_boolean_t; + +#ifndef FALSE +/** You figure it out. */ +#define FALSE 0 +#endif + +#ifndef TRUE +/** You figure it out. */ +#define TRUE 1 +#endif + +/** Our exception object. */ +typedef struct lcn_error_t { + apr_status_t apr_err; /**< The underlying APR error */ + + const char *message; /**< Human readable error message */ + + struct lcn_error_t *child; /**< The next error in the chain, or @c NULL */ + + apr_pool_t *pool; /**< The pool this error was allocated in */ + +} lcn_error_t; + +/** The best kind of error. */ +#define LCN_NO_ERROR 0 + +/** Create a new error with underlying error code @a apr_err, wrapping around + * @a child, with error message @a message. + */ +lcn_error_t * +lcn_error_create (apr_status_t apr_err, + lcn_error_t *child, + const char *message); + +/** A @c printf style version of @c lcn_error_create. */ +lcn_error_t * +lcn_error_createf (apr_status_t apr_err, + lcn_error_t *child, + const char *fmt, + ...) +#ifdef DOXYGEN_SHOULD_SKIP_THIS + ; +#else + __attribute__ ((format (printf, 3, 4))); +#endif + +/** Destroy @a error. */ +void lcn_error_clear (lcn_error_t *error); + +/** A handy wrapper for functions that return an @c lcn_error_t, just returns + * the error to our caller if it's not @c LCN_NO_ERROR. + */ +#define LCN_ERR(expr) \ + do { \ + lcn_error_t *lcn_err__temp = (expr); \ + if (lcn_err__temp) \ + return lcn_err__temp; \ + } while (0) + +/** A java style 16 bit char. */ +typedef apr_uint16_t lcn_char_t; + +/** Count the number of characters in a string of lcn_char_t's */ +apr_size_t lcn_strlen (const lcn_char_t *str); + +/** Get the size of a string of lcn_char_t's in bytes */ +#define LCN_STRING_SIZE(str) ((lcn_strlen (str) + 1) * sizeof (lcn_char_t)) + +/** Lexographically compare @a first and @a second. */ +int lcn_strcmp (const lcn_char_t *first, const lcn_char_t *second); + +/** Return a copy of @a str, allocated in @a pool. */ +lcn_char_t * +lcn_strcpy (const lcn_char_t *str, apr_pool_t *pool); + +/** Convert @a in into a cstring @a out, allocated in @a pool. + * + * This will result in an error if @a in cannot be represented in ASCII + * characters. + */ +lcn_error_t * +lcn_str_to_cstring (char **out, const lcn_char_t *in, apr_pool_t *pool); + +/** Create a string of @c lcn_char_ts that corresponds to the contents of + * @a in, allocated in @a pool. + */ +lcn_char_t * +lcn_str_from_cstring (const char *in, apr_pool_t *pool); + +/** Index into an apr_array_header_t */ +#define APR_ARRAY_IDX(ary,i,type) (((type *)(ary)->elts)[i]) + +/** Easier array-pushing syntax */ +#define APR_ARRAY_PUSH(ary,type) (*((type *)apr_array_push (ary))) + +/** A convenience wrapper for @c apr_pool_create. */ +apr_pool_t *lcn_pool_create (apr_pool_t *parent); + +/** For symmetry with @c lcn_pool_create. */ +#define lcn_pool_destroy apr_pool_destroy + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif Propchange: incubator/lucene4c/trunk/src/cmdline/ ------------------------------------------------------------------------------ --- svn:ignore (added) +++ svn:ignore Sat Feb 26 08:47:57 2005 @@ -0,0 +1,3 @@ +lcn +.deps +.dirstamp Added: incubator/lucene4c/trunk/src/cmdline/main.c URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/src/cmdline/main.c?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/src/cmdline/main.c (added) +++ incubator/lucene4c/trunk/src/cmdline/main.c Sat Feb 26 08:47:57 2005 @@ -0,0 +1,211 @@ +/* Copyright 2004-2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdlib.h> + +#include <apr.h> +#include <apr_pools.h> +#include <apr_file_io.h> /* maybe include this in lcn_index.h */ + +#include "lcn_index.h" +#include "lcn_types.h" +#include "lcn_segments.h" + +typedef lcn_error_t *(lcn_subcommand_t) (int argc, + char *argv[], + apr_pool_t *pool); + +lcn_subcommand_t lcn_segments_cmd, lcn_termdocs_cmd; + +typedef struct { + const char *name; + lcn_subcommand_t *cmd; +} subcommand_desc_t; + +subcommand_desc_t dispatch_table[] = { + { "segments", lcn_segments_cmd }, + { "termdocs", lcn_termdocs_cmd }, + { NULL } +}; + +lcn_error_t * +lcn_segments_cmd (int argc, char *argv[], apr_pool_t *pool) +{ + if (argc != 1) + { + printf ("usage: lcn segments <index>\n"); + return lcn_error_create (APR_EGENERAL, NULL, "missing argument"); + } + else + { + apr_array_header_t *names; + lcn_segments_t *segs; + lcn_directory_t *d; + int i; + + LCN_ERR (lcn_fs_directory_open (&d, argv[0], pool)); + + LCN_ERR (lcn_segments_read (&segs, d, pool)); + + names = lcn_segments_names (segs, pool); + + for (i = 0; i < names->nelts; ++i) + { + lcn_char_t *name = APR_ARRAY_IDX (names, i, lcn_char_t *); + char *pname; + + LCN_ERR (lcn_str_to_cstring (&pname, name, pool)); + + printf ("%s\n", pname); + } + + return LCN_NO_ERROR; + } +} + +static lcn_error_t * +print_doc_field (lcn_index_t *idx, + apr_uint32_t doc_num, + lcn_char_t *pfield, + apr_pool_t *pool) +{ + const lcn_field_t *field; + lcn_document_t *doc; + char *cstr; + + LCN_ERR (lcn_index_get_document (&doc, idx, doc_num, pool)); + + field = lcn_document_field_by_name (doc, pfield); + if (field) + { + const lcn_char_t *cont; + + cont = lcn_field_content (field); + + LCN_ERR (lcn_str_to_cstring (&cstr, cont, pool)); + + printf ("%s\n", cstr); + } + else + { + LCN_ERR (lcn_str_to_cstring (&cstr, pfield, pool)); + + return lcn_error_createf (APR_EINVAL, + NULL, + "field '%s' does not exist", + cstr); + } + + return LCN_NO_ERROR; +} + +lcn_error_t * +lcn_termdocs_cmd (int argc, char *argv[], apr_pool_t *pool) +{ + if (argc != 4) + { + printf ("usage: lcn termdocs <index> <term> <field> <pfield>\n"); + return LCN_NO_ERROR; + } + else + { + lcn_doc_iter_t *itr; + apr_pool_t *subpool; + lcn_char_t *pfield; + lcn_error_t *err; + lcn_index_t *idx; + lcn_term_t t; + + LCN_ERR (lcn_index_open (&idx, argv[0], pool)); + + t.contents = lcn_str_from_cstring (argv[1], pool); + + t.field = lcn_str_from_cstring (argv[2], pool); + + pfield = lcn_str_from_cstring (argv[3], pool); + + LCN_ERR (lcn_index_term_docs (&itr, idx, &t, pool)); + + subpool = lcn_pool_create (pool); + + LCN_ERR (print_doc_field (idx, lcn_doc_iter_doc (itr), pfield, subpool)); + + while ((err = lcn_doc_iter_next (itr)) == LCN_NO_ERROR) + { + apr_pool_clear (subpool); + + LCN_ERR (print_doc_field (idx, + lcn_doc_iter_doc (itr), + pfield, + subpool)); + } + + LCN_ERR (lcn_doc_iter_close (itr)); + + lcn_pool_destroy (subpool); + + return LCN_NO_ERROR; + } +} + +int +main (int argc, char *argv[]) +{ + apr_pool_t *pool; + int i = 0; + + apr_initialize (); + + atexit (apr_terminate); + + pool = lcn_pool_create (NULL); + + if (argc < 2) + { + printf ("usage: lcn <command> args\n\n"); + printf ("possible values for <command> are:\n"); + + while (dispatch_table[i].name) + { + printf ("\t%s\n", dispatch_table[i].name); + ++i; + } + + return EXIT_FAILURE; + } + else + { + while (dispatch_table[i].name) + { + if (strcmp (argv[1], dispatch_table[i].name) == 0) + { + lcn_error_t *err = dispatch_table[i].cmd (argc - 2, + argv + 2, + pool); + if (err) + { + printf ("err: '%s'\n", err->message); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; + } + + ++i; + } + } + + return EXIT_FAILURE; +} Propchange: incubator/lucene4c/trunk/src/document/ ------------------------------------------------------------------------------ --- svn:ignore (added) +++ svn:ignore Sat Feb 26 08:47:57 2005 @@ -0,0 +1,2 @@ +.deps +.dirstamp Added: incubator/lucene4c/trunk/src/document/document.c URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/src/document/document.c?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/src/document/document.c (added) +++ incubator/lucene4c/trunk/src/document/document.c Sat Feb 26 08:47:57 2005 @@ -0,0 +1,72 @@ +/* Copyright 2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lcn_document.h" +#include "apr_tables.h" + +struct lcn_document_t { + apr_array_header_t *fields; +}; + +lcn_document_t * +lcn_document_create (apr_pool_t *pool) +{ + lcn_document_t *doc = apr_pcalloc (pool, sizeof (*doc)); + + doc->fields = apr_array_make (pool, 5, sizeof (lcn_field_t *)); + + return doc; +} + +apr_uint32_t +lcn_document_numfields (lcn_document_t *doc) +{ + return doc->fields->nelts; +} + +void +lcn_document_add_field (lcn_document_t *doc, lcn_field_t *field) +{ + APR_ARRAY_PUSH (doc->fields, lcn_field_t *) = field; +} + +const lcn_field_t * +lcn_document_field_by_num (const lcn_document_t *doc, + apr_uint32_t num) +{ + if (num > (doc->fields->nelts - 1)) + return NULL; + + return APR_ARRAY_IDX (doc->fields, num, lcn_field_t *); +} + +const lcn_field_t * +lcn_document_field_by_name (const lcn_document_t *doc, + const lcn_char_t *name) +{ + int idx; + + for (idx = 0; idx < doc->fields->nelts; ++idx) + { + lcn_field_t *f = APR_ARRAY_IDX (doc->fields, idx, lcn_field_t *); + + if (lcn_strcmp (lcn_field_name (f), name) == 0) + { + return f; + } + } + + return NULL; +} Added: incubator/lucene4c/trunk/src/document/field.c URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/src/document/field.c?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/src/document/field.c (added) +++ incubator/lucene4c/trunk/src/document/field.c Sat Feb 26 08:47:57 2005 @@ -0,0 +1,52 @@ +/* Copyright 2004-2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lcn_field.h" + +struct lcn_field_t { + lcn_char_t *name; + lcn_char_t *content; + lcn_field_stored_t stored; + lcn_field_indexed_t indexed; + lcn_field_termvector_t termvector; +}; + +lcn_field_t * +lcn_field_create (const lcn_char_t *name, + const lcn_char_t *value, + lcn_field_stored_t stored, + lcn_field_indexed_t indexed, + lcn_field_termvector_t termvector, + apr_pool_t *pool) +{ + lcn_field_t *f = apr_pcalloc (pool, sizeof (*f)); + + f->name = lcn_strcpy (name, pool); + f->content = lcn_strcpy (value, pool); + + return f; +} + +const lcn_char_t * +lcn_field_name (const lcn_field_t *field) +{ + return field->name; +} + +const lcn_char_t * +lcn_field_content (const lcn_field_t *field) +{ + return field->content; +} Propchange: incubator/lucene4c/trunk/src/index/ ------------------------------------------------------------------------------ --- svn:ignore (added) +++ svn:ignore Sat Feb 26 08:47:57 2005 @@ -0,0 +1,2 @@ +.deps +.dirstamp Added: incubator/lucene4c/trunk/src/index/fielddata.c URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/src/index/fielddata.c?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/src/index/fielddata.c (added) +++ incubator/lucene4c/trunk/src/index/fielddata.c Sat Feb 26 08:47:57 2005 @@ -0,0 +1,154 @@ +/* Copyright 2004-2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lcn_fielddata.h" +#include "lcn_fieldinfos.h" +#include "lcn_istream.h" + +#include <apr_strings.h> +#include <apr_pools.h> + +#define FIELD_IS_TOKENIZED 0x1 +#define FIELD_IS_BINARY 0x2 +#define FIELD_IS_COMPRESSED 0x4 + +struct lcn_fielddata_t { + lcn_fieldinfos_t *fis; + lcn_istream_t *idx; + lcn_istream_t *data; +}; + +lcn_error_t * +lcn_fielddata_open (lcn_fielddata_t **fd, + const lcn_directory_t *directory, + const lcn_char_t *segment, + apr_pool_t *pool) +{ + lcn_fielddata_t *fdata = apr_pcalloc (pool, sizeof (*fdata)); + char *s; + + LCN_ERR (lcn_str_to_cstring (&s, segment, pool)); + + LCN_ERR (lcn_directory_open_file (&fdata->idx, + directory, + apr_psprintf (pool, "%s.fdx", s), + pool)); + + LCN_ERR (lcn_directory_open_file (&fdata->data, + directory, + apr_psprintf (pool, "%s.fdt", s), + pool)); + + LCN_ERR (lcn_fieldinfos_read (&fdata->fis, directory, segment, pool)); + + *fd = fdata; + + return LCN_NO_ERROR; +} + +lcn_error_t * +lcn_fielddata_get_doc (apr_array_header_t **pdoc, + lcn_fielddata_t *fd, + apr_uint32_t doc_num, + apr_pool_t *pool) +{ + apr_array_header_t *doc; + apr_uint32_t i, nfields; + apr_uint64_t position; + + /* the docs entry in the index is doc_num * 8 bytes in... */ + LCN_ERR (lcn_istream_seek (fd->idx, doc_num * 8L, pool)); + + LCN_ERR (lcn_istream_read_long (fd->idx, &position, pool)); + + LCN_ERR (lcn_istream_seek (fd->data, position, pool)); + + LCN_ERR (lcn_istream_read_vint (fd->data, &nfields, pool)); + + doc = apr_array_make (pool, nfields, sizeof (lcn_field_t *)); + + for (i = 0; i < nfields; ++i) + { + lcn_boolean_t compressed, tokenized, binary; + apr_uint32_t field_num; + lcn_field_t *field; + char bits; + + LCN_ERR (lcn_istream_read_vint (fd->data, &field_num, pool)); + + LCN_ERR (lcn_istream_read_byte (fd->data, &bits, pool)); + + compressed = (bits & FIELD_IS_COMPRESSED) != 0; + tokenized = (bits & FIELD_IS_TOKENIZED) != 0; + binary = (bits & FIELD_IS_BINARY) != 0; + + if (compressed) + return lcn_error_create (APR_ENOTIMPL, + NULL, + "compressed fields are not yet supported"); + if (binary) + return lcn_error_create (APR_ENOTIMPL, + NULL, + "binary fields are not yet supported"); + + { + lcn_field_termvector_t termvector; + lcn_field_indexed_t indexed; + lcn_field_stored_t stored; + + lcn_char_t *str; + + lcn_fieldinfo_t *fi = lcn_fieldinfos_get_by_number (fd->fis, + field_num); + + if (! fi) + return lcn_error_createf (APR_EINVAL, + NULL, + "couldn't find info for field number '%d'", + field_num); + + /* XXX need info on positions and offsets here */ + termvector = fi->store_term_vector ? LCN_TERMVECTOR_YES + : LCN_TERMVECTOR_NO; + + if (fi->is_indexed) + { + indexed = tokenized ? LCN_INDEXED_TOKENIZED + : LCN_INDEXED_UNTOKENIZED; + } + else + { + indexed = LCN_INDEXED_NO; + } + + stored = compressed ? LCN_STORED_COMPRESS : LCN_STORED_YES; + + LCN_ERR (lcn_istream_read_string (fd->data, &str, pool)); + + field = lcn_field_create (fi->name, + str, + stored, + indexed, + termvector, + pool); + + APR_ARRAY_PUSH(doc, lcn_field_t *) = field; + } + } + + *pdoc = doc; + + return LCN_NO_ERROR; +} Added: incubator/lucene4c/trunk/src/index/fieldinfos.c URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/src/index/fieldinfos.c?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/src/index/fieldinfos.c (added) +++ incubator/lucene4c/trunk/src/index/fieldinfos.c Sat Feb 26 08:47:57 2005 @@ -0,0 +1,105 @@ +/* Copyright 2004-2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lcn_fieldinfos.h" +#include "lcn_istream.h" + +#include <apr_strings.h> +#include <apr_hash.h> + +struct lcn_fieldinfos_t { + apr_array_header_t *by_number; + apr_hash_t *by_name; +}; + +lcn_error_t * +lcn_fieldinfos_read (lcn_fieldinfos_t **fis, + const lcn_directory_t *directory, + const lcn_char_t *segment, + apr_pool_t *pool) +{ + apr_uint32_t i, field_count; + lcn_istream_t *istream; + apr_pool_t *subpool; + char *s; + + LCN_ERR (lcn_str_to_cstring (&s, segment, pool)); + + *fis = apr_pcalloc (pool, sizeof (**fis)); + + (*fis)->by_number = apr_array_make (pool, 10, sizeof (lcn_fieldinfo_t *)); + (*fis)->by_name = apr_hash_make (pool); + + subpool = lcn_pool_create (pool); + + LCN_ERR (lcn_directory_open_file (&istream, + directory, + apr_psprintf (subpool, "%s.fnm", s), + subpool)); + + LCN_ERR (lcn_istream_read_vint (istream, &field_count, subpool)); + + for (i = 0; i < field_count; ++i) + { + lcn_fieldinfo_t *fi; + lcn_char_t *field_name; + char field_bits; + + LCN_ERR (lcn_istream_read_string (istream, &field_name, pool)); + + LCN_ERR (lcn_istream_read_byte (istream, &field_bits, subpool)); + + fi = apr_pcalloc (pool, sizeof (*fi)); + + fi->name = field_name; + fi->is_indexed = (field_bits & 0x01) != 0; + fi->number = i; + fi->store_term_vector = (field_bits & 0x02) != 0; + + apr_hash_set ((*fis)->by_name, + field_name, + LCN_STRING_SIZE (field_name), + fi); + + APR_ARRAY_PUSH ((*fis)->by_number, lcn_fieldinfo_t *) = fi; + } + + lcn_pool_destroy (subpool); + + return LCN_NO_ERROR; +} + +lcn_fieldinfo_t * +lcn_fieldinfos_get_by_name (const lcn_fieldinfos_t *fis, + const lcn_char_t *field) +{ + return apr_hash_get (fis->by_name, field, LCN_STRING_SIZE (field)); +} + +lcn_fieldinfo_t * +lcn_fieldinfos_get_by_number (const lcn_fieldinfos_t *fis, + int num) +{ + if (num < 0 || num > fis->by_number->nelts) + return NULL; + + return APR_ARRAY_IDX (fis->by_number, num, lcn_fieldinfo_t *); +} + +int +lcn_fieldinfos_get_count (const lcn_fieldinfos_t *fis) +{ + return fis->by_number->nelts; +} Added: incubator/lucene4c/trunk/src/index/frequencies.c URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/src/index/frequencies.c?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/src/index/frequencies.c (added) +++ incubator/lucene4c/trunk/src/index/frequencies.c Sat Feb 26 08:47:57 2005 @@ -0,0 +1,99 @@ +/* Copyright 2004-2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lcn_frequencies.h" +#include "lcn_istream.h" + +#include <apr_strings.h> + +struct lcn_frequencies_t { + lcn_istream_t *frq; +}; + +lcn_error_t * +lcn_frequencies_open (lcn_frequencies_t **freqs, + const lcn_directory_t *directory, + const lcn_char_t *segment, + apr_pool_t *pool) +{ + char *s; + + *freqs = apr_pcalloc (pool, sizeof (lcn_frequencies_t)); + + LCN_ERR (lcn_str_to_cstring (&s, segment, pool)); + + LCN_ERR (lcn_directory_open_file (&(*freqs)->frq, + directory, + apr_psprintf (pool, "%s.frq", s), + pool)); + return LCN_NO_ERROR; +} + +lcn_error_t * +lcn_frequencies_get_terminfo_data (apr_uint32_t **docs, + apr_uint32_t **freqs, + lcn_frequencies_t *lf, + const lcn_terminfo_t *ti, + apr_pool_t *pool) +{ + apr_uint32_t doc_freq = lcn_terminfo_doc_freq (ti); + + apr_uint32_t *d, *f; + + apr_pool_t *subpool = lcn_pool_create (pool); + + LCN_ERR (lcn_istream_seek (lf->frq, + lcn_terminfo_freq_position (ti), + subpool)); + + d = apr_pcalloc (pool, sizeof (apr_uint32_t) * doc_freq); + f = apr_pcalloc (pool, sizeof (apr_uint32_t) * doc_freq); + + { + apr_uint32_t count = 0, freq = 0, doc = 0, i = 0; + + while (i < doc_freq && count < doc_freq) + { + apr_uint32_t doc_code; + + apr_pool_clear (subpool); + + LCN_ERR (lcn_istream_read_vint (lf->frq, &doc_code, subpool)); + + doc += doc_code >> 1; /* shift off the low bit, but java uses >>> */ + + if ((doc_code & 1) != 0) + freq = 1; + else + { + LCN_ERR (lcn_istream_read_vint (lf->frq, &freq, subpool)); + } + + ++count; + + if (1) /* XXX check for deleted docs */ + { + d[i] = doc; + f[i] = freq; + ++i; + } + } + } + + *docs = d; + *freqs = f; + + return LCN_NO_ERROR; +} Added: incubator/lucene4c/trunk/src/index/index.c URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/src/index/index.c?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/src/index/index.c (added) +++ incubator/lucene4c/trunk/src/index/index.c Sat Feb 26 08:47:57 2005 @@ -0,0 +1,156 @@ +/* Copyright 2004-2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lcn_index.h" +#include "lcn_segments.h" + +#include <apr_file_io.h> +#include <apr_strings.h> + +struct lcn_index_t { + lcn_directory_t *directory; + + lcn_segments_t *segments; +}; + +lcn_error_t * +lcn_index_open (lcn_index_t **i, + const char *directory, + apr_pool_t *pool) +{ + lcn_index_t *idx = apr_pcalloc (pool, sizeof (*idx)); + + LCN_ERR (lcn_fs_directory_open (&idx->directory, directory, pool)); + + LCN_ERR (lcn_segments_read (&idx->segments, idx->directory, pool)); + + *i = idx; + + return LCN_NO_ERROR; +} + +apr_uint32_t +lcn_index_max_docs (lcn_index_t *idx, apr_pool_t *pool) +{ + return lcn_segments_max_docs (idx->segments, pool); +} + +struct lcn_doc_iter_t { + lcn_index_t *index; + + lcn_term_t *term; + + /* which segment are we currently going through? */ + apr_uint32_t cur_seg; + + apr_uint32_t num_segs; + + /* where are we in our current segment? */ + apr_uint32_t idx_in_seg; + + /* how many docs did we match in the current seg? */ + apr_uint32_t count_in_seg; + + apr_uint32_t *seg_docnums; + apr_uint32_t *seg_freqnums; + + /* this pool is used ONLY to allocate seg_docnums and seg_freqnums */ + apr_pool_t *temppool; +}; + +apr_uint32_t +lcn_doc_iter_doc (lcn_doc_iter_t *itr) +{ + return itr->seg_docnums[itr->idx_in_seg]; +} + +apr_uint32_t +lcn_doc_iter_freq (lcn_doc_iter_t *itr) +{ + return itr->seg_freqnums[itr->idx_in_seg]; +} + +lcn_error_t * +lcn_doc_iter_next (lcn_doc_iter_t *itr) +{ + if (++itr->idx_in_seg == itr->count_in_seg) + { + if (++itr->cur_seg == itr->num_segs) + return lcn_error_create (APR_EOF, NULL, "iteration finished"); + + apr_pool_clear (itr->temppool); + + itr->idx_in_seg = 0; + + LCN_ERR (lcn_segments_term_docs (&itr->count_in_seg, + &itr->seg_docnums, + &itr->seg_freqnums, + itr->index->segments, + itr->cur_seg, + itr->term, + itr->temppool)); + } + + return LCN_NO_ERROR; +} + +lcn_error_t * +lcn_doc_iter_close (lcn_doc_iter_t *itr) +{ + lcn_pool_destroy (itr->temppool); + + return LCN_NO_ERROR; +} + +lcn_error_t * +lcn_index_term_docs (lcn_doc_iter_t **i, + lcn_index_t *idx, + lcn_term_t *term, + apr_pool_t *pool) +{ + lcn_doc_iter_t *itr = apr_palloc (pool, sizeof (*itr)); + + itr->index = idx; + + itr->term = lcn_term_create (term->contents, term->field, pool); + + itr->cur_seg = 0; + itr->num_segs = lcn_segments_count (idx->segments); + + itr->idx_in_seg = 0; + + itr->temppool = lcn_pool_create (pool); + + LCN_ERR (lcn_segments_term_docs (&itr->count_in_seg, + &itr->seg_docnums, + &itr->seg_freqnums, + idx->segments, + itr->cur_seg, + itr->term, + itr->temppool)); + + *i = itr; + + return LCN_NO_ERROR; +} + +lcn_error_t * +lcn_index_get_document (lcn_document_t **doc, + lcn_index_t *idx, + apr_uint32_t docnum, + apr_pool_t *pool) +{ + return lcn_segments_get_document (doc, idx->segments, docnum, pool); +} Added: incubator/lucene4c/trunk/src/index/segment.c URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/src/index/segment.c?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/src/index/segment.c (added) +++ incubator/lucene4c/trunk/src/index/segment.c Sat Feb 26 08:47:57 2005 @@ -0,0 +1,160 @@ +/* Copyright 2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lcn_segment.h" +#include "lcn_terminfos.h" +#include "lcn_fieldinfos.h" +#include "lcn_fielddata.h" +#include "lcn_frequencies.h" + +#include <apr_strings.h> + +#include <assert.h> + +struct lcn_segment_t { + lcn_char_t *name; + + apr_uint32_t size; + + const lcn_directory_t *directory; + + lcn_terminfos_t *term_infos; + + lcn_fielddata_t *field_data; + + lcn_frequencies_t *frequencies; +}; + +lcn_error_t * +lcn_segment_open (lcn_segment_t **segment, + const lcn_char_t *segname, + apr_uint32_t size, + const lcn_directory_t *directory, + apr_pool_t *pool) +{ + lcn_segment_t *s = apr_pcalloc (pool, sizeof (*s)); + char *cfsname, *cfsfilename; + lcn_boolean_t cfs_exists; + + s->name = lcn_strcpy (segname, pool); + s->size = size; + + LCN_ERR (lcn_str_to_cstring (&cfsname, segname, pool)); + + cfsfilename = apr_psprintf (pool, "%s.cfs", cfsname); + + LCN_ERR (lcn_directory_file_exists (&cfs_exists, + directory, + cfsfilename, + pool)); + if (cfs_exists) + { + lcn_istream_t *cfsstream; + lcn_directory_t *d; + + LCN_ERR (lcn_directory_open_file (&cfsstream, + directory, + cfsfilename, + pool)); + + LCN_ERR (lcn_cfs_directory_open (&d, cfsstream, pool)); + + LCN_ERR (lcn_istream_close (cfsstream, pool)); + + s->directory = d; + } + else + { + s->directory = directory; + } + + LCN_ERR (lcn_terminfos_open (&s->term_infos, s->directory, segname, pool)); + + LCN_ERR (lcn_fielddata_open (&s->field_data, s->directory, segname, pool)); + + LCN_ERR (lcn_frequencies_open (&s->frequencies, + s->directory, + segname, + pool)); + + *segment = s; + + return LCN_NO_ERROR; +} + +const lcn_char_t * +lcn_segment_name (const lcn_segment_t *seg) +{ + return seg->name; +} + +apr_uint32_t +lcn_segment_size (const lcn_segment_t *seg) +{ + return seg->size; +} + +const lcn_directory_t * +lcn_segment_directory (const lcn_segment_t *seg) +{ + return seg->directory; +} + +lcn_error_t * +lcn_segment_term_docs (apr_uint32_t *doc_freq, + apr_uint32_t **docs, + apr_uint32_t **freqs, + lcn_segment_t *segment, + const lcn_term_t *term, + apr_pool_t *pool) +{ + lcn_terminfo_t *ti; + + LCN_ERR (lcn_terminfos_get_terminfo (&ti, segment->term_infos, term, pool)); + + LCN_ERR (lcn_frequencies_get_terminfo_data (docs, + freqs, + segment->frequencies, + ti, + pool)); + + *doc_freq = lcn_terminfo_doc_freq (ti); + + return LCN_NO_ERROR; +} + +lcn_error_t * +lcn_segment_get_document (lcn_document_t **doc, + lcn_segment_t *segment, + apr_uint32_t docnum, + apr_pool_t *pool) +{ + apr_array_header_t *fields; + lcn_document_t *d; + int i; + + LCN_ERR (lcn_fielddata_get_doc (&fields, segment->field_data, docnum, pool)); + + d = lcn_document_create (pool); + + for (i = 0; i < fields->nelts; ++i) + { + lcn_document_add_field (d, APR_ARRAY_IDX (fields, i, lcn_field_t *)); + } + + *doc = d; + + return LCN_NO_ERROR; +} Added: incubator/lucene4c/trunk/src/index/segments.c URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/src/index/segments.c?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/src/index/segments.c (added) +++ incubator/lucene4c/trunk/src/index/segments.c Sat Feb 26 08:47:57 2005 @@ -0,0 +1,203 @@ +/* Copyright 2004-2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lcn_segments.h" +#include "lcn_segment.h" +#include "lcn_istream.h" + +#include <apr_file_io.h> +#include <apr_strings.h> +#include <apr_hash.h> + +#define SEGMENTS_VERSION ((apr_uint32_t) -1) + +struct lcn_segments_t { + apr_uint32_t format; + apr_uint64_t version; + + apr_hash_t *segments; /* name (lcn_char_t *) -> segment (lcn_segment_t *) */ + + apr_array_header_t *segments_bynum; +}; + +lcn_error_t * +lcn_segments_read (lcn_segments_t **segments, + const lcn_directory_t *directory, + apr_pool_t *pool) +{ + apr_pool_t *subpool = lcn_pool_create (pool); + lcn_istream_t *istream; + apr_uint32_t segcount; + int i; + + LCN_ERR (lcn_directory_open_file (&istream, directory, "segments", subpool)); + + *segments = apr_pcalloc (pool, sizeof (**segments)); + + LCN_ERR (lcn_istream_read_int (istream, &((*segments)->format), subpool)); + + if ((*segments)->format != SEGMENTS_VERSION) + return lcn_error_create (APR_EINVAL, NULL, "bogus segments format"); + + LCN_ERR (lcn_istream_read_long (istream, &((*segments)->version), subpool)); + + /* XXX this is reading in the 'counter', which doesn't seem to be documented + in the format web page... should ask on the lucene mailing list... */ + LCN_ERR (lcn_istream_read_int (istream, &segcount, subpool)); + + LCN_ERR (lcn_istream_read_int (istream, &segcount, subpool)); + + (*segments)->segments = apr_hash_make (pool); + + (*segments)->segments_bynum = apr_array_make (pool, + segcount, + sizeof (lcn_segment_t *)); + + for (i = 0; i < segcount; ++i) + { + lcn_segment_t *segment; + apr_uint32_t size; + lcn_char_t *name; + + /* the string is dynamically allocated, so we use pool so it persists */ + LCN_ERR (lcn_istream_read_string (istream, &name, pool)); + + LCN_ERR (lcn_istream_read_int (istream, &size, subpool)); + + LCN_ERR (lcn_segment_open (&segment, name, size, directory, pool)); + + apr_hash_set ((*segments)->segments, + name, + LCN_STRING_SIZE (name), + segment); + + APR_ARRAY_PUSH ((*segments)->segments_bynum, lcn_segment_t *) = segment; + } + + lcn_pool_destroy (subpool); + + return LCN_NO_ERROR; +} + +apr_uint64_t +lcn_segments_version (const lcn_segments_t *segments) +{ + return segments->version; +} + +unsigned int +lcn_segments_count (const lcn_segments_t *segments) +{ + return apr_hash_count (segments->segments); +} + +apr_uint32_t +lcn_segments_segsize (const lcn_segments_t *segments, const lcn_char_t *name) +{ + lcn_segment_t *seg = apr_hash_get (segments->segments, + name, + LCN_STRING_SIZE (name)); + if (seg) + return lcn_segment_size (seg); + else + return -1; +} + +apr_array_header_t * +lcn_segments_names (const lcn_segments_t *segments, apr_pool_t *pool) +{ + apr_array_header_t *rv = apr_array_make (pool, + lcn_segments_count (segments), + sizeof (lcn_char_t *)); + + apr_hash_index_t *hi; + + for (hi = apr_hash_first (pool, segments->segments); + hi; + hi = apr_hash_next (hi)) + { + const void *key; + apr_size_t size; + + apr_hash_this (hi, &key, &size, NULL); + + APR_ARRAY_PUSH (rv, lcn_char_t *) = apr_pmemdup (pool, key, size); + } + + return rv; +} + +apr_uint32_t +lcn_segments_max_docs (const lcn_segments_t *segments, apr_pool_t *pool) +{ + apr_uint32_t rv = 0; + apr_hash_index_t *hi; + + for (hi = apr_hash_first (pool, segments->segments); + hi; + hi = apr_hash_next (hi)) + { + void *val; + + apr_hash_this (hi, NULL, NULL, &val); + + rv += lcn_segment_size (val); + } + + return rv; +} + +lcn_error_t * +lcn_segments_term_docs (apr_uint32_t *doc_freq, + apr_uint32_t **docs, + apr_uint32_t **freqs, + lcn_segments_t *segments, + apr_uint32_t segnum, + const lcn_term_t *term, + apr_pool_t *pool) +{ + lcn_segment_t *seg; + + if (segnum > segments->segments_bynum->nelts) + return lcn_error_create (APR_EINVAL, NULL, "invalid segment number"); + + seg = APR_ARRAY_IDX (segments->segments_bynum, segnum, lcn_segment_t *); + + LCN_ERR (lcn_segment_term_docs (doc_freq, docs, freqs, seg, term, pool)); + + return LCN_NO_ERROR; +} + +lcn_error_t * +lcn_segments_get_document (lcn_document_t **doc, + lcn_segments_t *segments, + apr_uint32_t docnum, + apr_pool_t *pool) +{ + lcn_segment_t *seg; + + /* XXX find appropriate segment for docnum */ + if (docnum > lcn_segment_size (APR_ARRAY_IDX (segments->segments_bynum, + 0, + lcn_segment_t *))) + return lcn_error_create + (APR_ENOTIMPL, + NULL, + "return documents in segment > 0 not yet implemented"); + else + seg = APR_ARRAY_IDX (segments->segments_bynum, 0, lcn_segment_t *); + + return lcn_segment_get_document (doc, seg, docnum, pool); +} Added: incubator/lucene4c/trunk/src/index/term.c URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/src/index/term.c?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/src/index/term.c (added) +++ incubator/lucene4c/trunk/src/index/term.c Sat Feb 26 08:47:57 2005 @@ -0,0 +1,74 @@ +/* Copyright 2004-2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lcn_term.h" + +lcn_term_t * +lcn_term_create (const lcn_char_t *contents, + const lcn_char_t *field, + apr_pool_t *pool) +{ + lcn_term_t *rv = apr_pcalloc (pool, sizeof (*rv)); + + rv->contents = lcn_strcpy (contents, pool); + rv->field = lcn_strcpy (field, pool); + + return rv; +} + +lcn_term_t * +lcn_term_create_cstring (const char *contents, + const char *field, + apr_pool_t *pool) +{ + lcn_term_t *rv = apr_pcalloc (pool, sizeof (*rv)); + apr_size_t i, len; + + rv->contents = apr_pcalloc (pool, + (strlen (contents) + 1) * sizeof (lcn_char_t)); + + len = strlen (contents); + + for (i = 0; i < len; ++i) + { + rv->contents[i] = contents[i]; + } + + rv->field = apr_pcalloc (pool, (strlen (field) + 1) * sizeof (lcn_char_t)); + + len = strlen (field); + + for (i = 0; i < len; ++i) + { + rv->field[i] = field[i]; + } + + return rv; +} + +int +lcn_term_compare (const lcn_term_t *one, const lcn_term_t *two) +{ + int tmp = lcn_strcmp (one->field, two->field); + + if (tmp != 0) + { + return tmp; + } + else + { + return lcn_strcmp (one->contents, two->contents); + } +} Added: incubator/lucene4c/trunk/src/index/terminfos.c URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/src/index/terminfos.c?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/src/index/terminfos.c (added) +++ incubator/lucene4c/trunk/src/index/terminfos.c Sat Feb 26 08:47:57 2005 @@ -0,0 +1,448 @@ +/* Copyright 2004-2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lcn_terminfos.h" +#include "lcn_fieldinfos.h" +#include "lcn_istream.h" +#include "lcn_term.h" + +#include <apr_strings.h> + +#define TERMINFOS_VERSION ((apr_uint32_t) -2) + +struct lcn_terminfo_t { + /* the actual word... */ + lcn_term_t *term; + + /* what field is this? */ + apr_uint32_t field_num; + + /* # of documents that contain the term */ + apr_uint32_t doc_freq; + + /* offset of freq data in .frq file */ + apr_uint64_t freq_position; + + /* offset of positions in .prx file */ + apr_uint64_t prox_position; + + /* offset of skip data from freq data in .frq file */ + apr_uint32_t skip_offset; +}; + +struct lcn_terminfos_idx_t { + apr_uint32_t version; /* XXX undocumented */ + + apr_uint64_t count; /* XXX documented as a uint32 */ + + apr_uint32_t index_interval; /* XXX undocumented */ + + apr_uint32_t skip_interval; /* XXX undocumented */ + + lcn_terminfo_t **terminfos; + + lcn_fieldinfos_t *fis; + + apr_uint64_t *offsets; +}; + +struct lcn_terminfos_t { + apr_uint32_t version; + + lcn_terminfos_idx_t *idx; + + lcn_istream_t *tis; + + apr_uint64_t term_count; +}; + +/* read a terminfo object from ISTREAM, placing the results in *TI. + * + * TISIDX is the index for the terminfos file we're reading from. + * + * if PREV is non-null then use it when filling in the terminfo we're + * reading, otherwise assume we're reading the first one in the file. + * + * TMPPOOL is used for temporary allocations, but the final terminfo + * returned in *TI is allocated in POOL. */ +static lcn_error_t * +read_terminfo (lcn_terminfo_t **ti, + lcn_terminfos_idx_t *tisidx, + lcn_terminfo_t *prev, + lcn_istream_t *istream, + apr_pool_t *tmppool, + apr_pool_t *pool) +{ + *ti = apr_pcalloc (pool, sizeof (**ti)); + + { + apr_uint32_t start, length, total_length; + lcn_char_t *t; + + LCN_ERR (lcn_istream_read_vint (istream, &start, tmppool)); + + /* the beginning of the string... */ + LCN_ERR (lcn_istream_read_vint (istream, &length, tmppool)); + + total_length = start + length; + + t = apr_pcalloc (pool, (total_length + 1) * sizeof (lcn_char_t)); + + if (prev) + { + /* pull in the prefix, which is made of the first start bytes of + * the previous term. + * + * XXX this strlen is slow. should cache len somewhere. */ + if (lcn_strlen (prev->term->contents) >= start) + memcpy (t, prev->term->contents, start * sizeof (lcn_char_t)); + else + return lcn_error_create (APR_EINVAL, + NULL, + "tried to copy more bytes than exist"); + } + + { + lcn_char_t *tmp; + + LCN_ERR (lcn_istream_read_chars (istream, + &tmp, + length, + tmppool)); + + memcpy (t + start, tmp, length * sizeof (lcn_char_t)); + } + + LCN_ERR (lcn_istream_read_vint (istream, &(*ti)->field_num, tmppool)); + + { + lcn_term_t *new_term = apr_pcalloc (pool, sizeof (*new_term)); + + lcn_fieldinfo_t *fi = lcn_fieldinfos_get_by_number (tisidx->fis, + (*ti)->field_num); + + new_term->contents = t; + + if (fi) + { + new_term->field = lcn_strcpy (fi->name, pool); + } + else + { + static const lcn_char_t empty[] = { 0 }; + new_term->field = lcn_strcpy (empty, pool); + } + + (*ti)->term = new_term; + } + + LCN_ERR (lcn_istream_read_vint (istream, &(*ti)->doc_freq, tmppool)); + + LCN_ERR (lcn_istream_read_vlong (istream, &(*ti)->freq_position, tmppool)); + + if (prev) + { + (*ti)->freq_position += prev->freq_position; + } + + LCN_ERR (lcn_istream_read_vlong (istream, &(*ti)->prox_position, tmppool)); + + if (prev) + { + (*ti)->prox_position += prev->prox_position; + } + } + + if ((*ti)->doc_freq >= tisidx->skip_interval) + { + LCN_ERR (lcn_istream_read_vint (istream, &(*ti)->skip_offset, tmppool)); + } + else if (prev) + { + (*ti)->skip_offset = prev->skip_offset; + } + + return LCN_NO_ERROR; +} + +const lcn_term_t * +lcn_terminfo_term (const lcn_terminfo_t *ti) +{ + return ti->term; +} + +apr_uint32_t +lcn_terminfo_field_num (const lcn_terminfo_t *ti) +{ + return ti->field_num; +} + +apr_uint32_t +lcn_terminfo_doc_freq (const lcn_terminfo_t *ti) +{ + return ti->doc_freq; +} + +apr_uint64_t +lcn_terminfo_prox_position (const lcn_terminfo_t *ti) +{ + return ti->prox_position; +} + +apr_uint64_t +lcn_terminfo_freq_position (const lcn_terminfo_t *ti) +{ + return ti->freq_position; +} + +apr_uint32_t +lcn_terminfo_skip_offset (const lcn_terminfo_t *ti) +{ + return ti->skip_offset; +} + +lcn_terminfo_t * +lcn_terminfo_copy (const lcn_terminfo_t *ti, apr_pool_t *pool) +{ + lcn_terminfo_t *rv = apr_pcalloc (pool, sizeof (*rv)); + + memcpy (rv, ti, sizeof (*rv)); + + rv->term = lcn_term_create (ti->term->contents, ti->term->field, pool); + + return rv; +} + +lcn_error_t * +lcn_terminfos_idx_read (lcn_terminfos_idx_t **tisidx, + const lcn_directory_t *directory, + const lcn_char_t *segment, + apr_pool_t *pool) +{ + apr_pool_t *subpool = lcn_pool_create (pool); + lcn_istream_t *istream; + apr_uint64_t i; + char *s; + + LCN_ERR (lcn_str_to_cstring (&s, segment, pool)); + + *tisidx = apr_pcalloc (pool, sizeof (**tisidx)); + + LCN_ERR (lcn_fieldinfos_read (&(*tisidx)->fis, directory, segment, pool)); + + LCN_ERR (lcn_directory_open_file (&istream, + directory, + apr_psprintf (pool, "%s.tii", s), + pool)); + + LCN_ERR (lcn_istream_read_int (istream, &(*tisidx)->version, subpool)); + + if ((*tisidx)->version != TERMINFOS_VERSION) + return lcn_error_create (APR_EINVAL, NULL, "unsupported terminfos version"); + + LCN_ERR (lcn_istream_read_long (istream, &(*tisidx)->count, subpool)); + + LCN_ERR (lcn_istream_read_int (istream, + &(*tisidx)->index_interval, + subpool)); + + LCN_ERR (lcn_istream_read_int (istream, &(*tisidx)->skip_interval, subpool)); + + (*tisidx)->terminfos + = apr_pcalloc (pool, sizeof (lcn_terminfo_t *) * (*tisidx)->count); + + (*tisidx)->offsets + = apr_pcalloc (pool, sizeof (apr_uint64_t) * (*tisidx)->count); + + for (i = 0; i < (*tisidx)->count; ++i) + { + apr_uint64_t offset; + lcn_terminfo_t *ti; + + LCN_ERR (read_terminfo (&ti, + *tisidx, + i != 0 ? (*tisidx)->terminfos[i - 1] : NULL, + istream, + subpool, + pool)); + + LCN_ERR (lcn_istream_read_vlong (istream, &offset, subpool)); + + (*tisidx)->terminfos[i] = ti; + + (*tisidx)->offsets[i] = offset; + + if (i > 0) + { + (*tisidx)->offsets[i] += (*tisidx)->offsets[i - 1]; + } + + apr_pool_clear (subpool); + } + + lcn_pool_destroy (subpool); + + return LCN_NO_ERROR; +} + +apr_uint64_t +lcn_terminfos_idx_get_count (const lcn_terminfos_idx_t *tisidx) +{ + return tisidx->count; +} + +lcn_error_t * +lcn_terminfos_idx_get_terminfo (lcn_terminfo_t **ti, + const lcn_terminfos_idx_t *tisidx, + apr_uint64_t position) +{ + if (position > tisidx->count) + { + return lcn_error_create (APR_EINVAL, + NULL, + "tried to get terminfo that doesn't exist"); + } + else + { + *ti = tisidx->terminfos[position]; + + return LCN_NO_ERROR; + } +} + +lcn_error_t * +lcn_terminfos_open (lcn_terminfos_t **tis, + const lcn_directory_t *directory, + const lcn_char_t *segment, + apr_pool_t *pool) +{ + char *s; + + *tis = apr_pcalloc (pool, sizeof (lcn_terminfos_t)); + + LCN_ERR (lcn_str_to_cstring (&s, segment, pool)); + + LCN_ERR (lcn_terminfos_idx_read (&(*tis)->idx, + directory, + segment, + pool)); + + LCN_ERR (lcn_directory_open_file (&(*tis)->tis, + directory, + apr_psprintf (pool, "%s.tis", s), + pool)); + return LCN_NO_ERROR; +} + +static int +get_index_offset (lcn_terminfos_idx_t *idx, const lcn_term_t *term) +{ + int high = idx->count - 1; + int low = 0; + + while (high >= low) + { + int mid = (low + high) >> 1; + int delta = lcn_term_compare (term, idx->terminfos[mid]->term); + if (delta < 0) + high = mid - 1; + else if (delta > 0) + low = mid + 1; + else + return mid; + } + + return high; +} + +lcn_error_t * +lcn_terminfos_get_terminfo (lcn_terminfo_t **ti, + const lcn_terminfos_t *tis, + const lcn_term_t *term, + apr_pool_t *pool) +{ + apr_pool_t *oldpool, *newpool; + lcn_terminfo_t *tmp, *prev; + + int i = get_index_offset (tis->idx, term); + + /* did we find it right in the index? */ + if (lcn_term_compare (term, tis->idx->terminfos[i]->term) == 0) + { + *ti = lcn_terminfo_copy (tis->idx->terminfos[i], pool); + return LCN_NO_ERROR; + } + + /* seek to the offset in the tis file... */ + LCN_ERR (lcn_istream_seek (tis->tis, tis->idx->offsets[i], pool)); + + /* we use two temp pools since we need to keep each terminfo we + * get around so we can use it for the next read_terminfo call. + * + * we don't bother with an explicit temp pool because it makes + * little sense, instead we just pass one pool to rea_terminfo + * as both the temp and regular pool. the final return value is + * duplicated into the main pool before returning. */ + oldpool = lcn_pool_create (pool); + newpool = lcn_pool_create (pool); + + /* ok, we already have a terminfo for that term from the index, + * so use it to allow us to calculate the following terminfos. */ + prev = tis->idx->terminfos[i]; + + /* now loop, reading terminfos until we hit the one we want */ + for (;;) + { + LCN_ERR (read_terminfo (&tmp, + tis->idx, + prev, + tis->tis, + newpool, + newpool)); + + i = lcn_term_compare (term, tmp->term); + + if (i == 0) + { + *ti = lcn_terminfo_copy (tmp, pool); + + lcn_pool_destroy (newpool); + lcn_pool_destroy (oldpool); + + return LCN_NO_ERROR; + } + else if (i > 0) + { + apr_pool_t *tmppool; + + prev = tmp; + + apr_pool_clear (oldpool); + + tmppool = oldpool; + oldpool = newpool; + newpool = tmppool; + } + else if (i < 0) + { + lcn_pool_destroy (newpool); + lcn_pool_destroy (oldpool); + + *ti = NULL; + + return lcn_error_create (APR_ENOENT, NULL, "term not found"); + } + } +} Propchange: incubator/lucene4c/trunk/src/search/ ------------------------------------------------------------------------------ --- svn:ignore (added) +++ svn:ignore Sat Feb 26 08:47:57 2005 @@ -0,0 +1,2 @@ +.deps +.dirstamp Added: incubator/lucene4c/trunk/src/search/query.c URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/src/search/query.c?view=auto&rev=155552 ============================================================================== --- incubator/lucene4c/trunk/src/search/query.c (added) +++ incubator/lucene4c/trunk/src/search/query.c Sat Feb 26 08:47:57 2005 @@ -0,0 +1,148 @@ +/* Copyright 2004-2005 Garrett Rooney + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lcn_query.h" + +typedef enum { + TERM_QUERY, + BOOLEAN_QUERY +} query_type_t; + +typedef lcn_error_t * (*query_scorer_internal_t) (lcn_scorer_t **scorer, + lcn_query_t *query, + lcn_index_t *index, + apr_pool_t *pool); + +struct lcn_query_t { + query_type_t type; + + query_scorer_internal_t scorer_internal; + + void *baton; +}; + +static lcn_error_t * +term_scorer_internal (lcn_scorer_t **scorer, + lcn_query_t *query, + lcn_index_t *index, + apr_pool_t *pool) +{ + lcn_term_t *term = query->baton; + lcn_doc_iter_t *iter; + + LCN_ERR (lcn_index_term_docs (&iter, index, term, pool)); + + LCN_ERR (lcn_term_scorer_create (scorer, iter, pool)); + + return LCN_NO_ERROR; +} + +lcn_error_t * +lcn_term_query_create (lcn_query_t **q, + lcn_term_t *t, + apr_pool_t *pool) +{ + *q = apr_pcalloc (pool, sizeof (**q)); + + (*q)->type = TERM_QUERY; + (*q)->scorer_internal = term_scorer_internal; + (*q)->baton = t; + + return LCN_NO_ERROR; +} + +typedef struct { + apr_array_header_t *must; + apr_array_header_t *should; + apr_array_header_t *must_not; +} boolean_query_baton_t; + +static lcn_error_t * +boolean_scorer_internal (lcn_scorer_t **scorer, + lcn_query_t *query, + lcn_index_t *index, + apr_pool_t *pool) +{ + boolean_query_baton_t *bqb = query->baton; + + LCN_ERR (lcn_boolean_scorer_create (scorer, + bqb->must, + bqb->should, + bqb->must_not, + index, + pool)); + + return LCN_NO_ERROR; +} + +lcn_error_t * +lcn_boolean_query_create (lcn_query_t **q, apr_pool_t *pool) +{ + boolean_query_baton_t *bqb = apr_pcalloc (pool, sizeof (*bqb)); + + bqb->must = apr_array_make (pool, 5, sizeof (lcn_query_t *)); + bqb->should = apr_array_make (pool, 5, sizeof (lcn_query_t *)); + bqb->must_not = apr_array_make (pool, 5, sizeof (lcn_query_t *)); + + *q = apr_pcalloc (pool, sizeof (**q)); + + (*q)->type = BOOLEAN_QUERY; + (*q)->scorer_internal = boolean_scorer_internal; + (*q)->baton = bqb; + + return LCN_NO_ERROR; +} + +lcn_error_t * +lcn_boolean_query_add (lcn_query_t *query, + lcn_query_t *clause, + lcn_boolean_clause_occur_t occur) +{ + boolean_query_baton_t *bqb = query->baton; + + if (query->type != BOOLEAN_QUERY) + return lcn_error_create (APR_EINVAL, + NULL, + "can only add clauses to a boolean query"); + + switch (occur) + { + case LCN_MUST: + APR_ARRAY_PUSH (bqb->must, lcn_query_t *) = clause; + break; + + case LCN_SHOULD: + APR_ARRAY_PUSH (bqb->should, lcn_query_t *) = clause; + break; + + case LCN_MUST_NOT: + APR_ARRAY_PUSH (bqb->must_not, lcn_query_t *) = clause; + break; + + default: + return lcn_error_create (APR_EINVAL, NULL, "invalid occur argument"); + } + + return LCN_NO_ERROR; +} + +lcn_error_t * +lcn_query_scorer (lcn_scorer_t **scorer, + lcn_query_t *query, + lcn_index_t *index, + apr_pool_t *pool) +{ + return query->scorer_internal (scorer, query, index, pool); +}
