Adds the necessary bits to enable DOM indexing for bib records as an option during installation from source.
Signed-off-by: Galen Charlton <[email protected]> --- Makefile.PL | 22 +++++++++++ etc/koha-conf.xml | 11 ++++- etc/zebradb/biblios/etc/dom-config-marc.xml | 54 ++++++++++++++++++++++++++ etc/zebradb/biblios/etc/dom-config.xml | 54 ++++++++++++++++++++++++++ etc/zebradb/retrieval-info-bib-dom.xml | 22 +++++++++++ etc/zebradb/retrieval-info-bib-grs1.xml | 40 +++++++++++++++++++ etc/zebradb/zebra-biblios-dom.cfg | 55 +++++++++++++++++++++++++++ misc/koha-install-log | 4 +- rewrite-config.PL | 3 + 9 files changed, 261 insertions(+), 4 deletions(-) create mode 100644 etc/zebradb/biblios/etc/dom-config-marc.xml create mode 100644 etc/zebradb/biblios/etc/dom-config.xml create mode 100644 etc/zebradb/retrieval-info-bib-dom.xml create mode 100644 etc/zebradb/retrieval-info-bib-grs1.xml create mode 100644 etc/zebradb/zebra-biblios-dom.cfg diff --git a/Makefile.PL b/Makefile.PL index 2e414d5..c3644f8 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -408,6 +408,7 @@ my %config_defaults = ( 'INSTALL_SRU' => 'yes', 'INSTALL_PAZPAR2' => 'no', 'AUTH_INDEX_MODE' => 'dom', + 'BIB_INDEX_MODE' => 'dom', 'ZEBRA_MARC_FORMAT' => 'marc21', 'ZEBRA_LANGUAGE' => 'en', 'ZEBRA_TOKENIZER' => 'chr', @@ -460,6 +461,7 @@ my %valid_config_values = ( 'INSTALL_ZEBRA' => { 'yes' => 1, 'no' => 1 }, 'INSTALL_SRU' => { 'yes' => 1, 'no' => 1 }, 'AUTH_INDEX_MODE' => { 'grs1' => 1, 'dom' => 1 }, + 'BIB_INDEX_MODE' => { 'grs1' => 1, 'dom' => 1 }, 'ZEBRA_MARC_FORMAT' => { 'marc21' => 1, 'normarc' => 1, 'unimarc' => 1 }, # FIXME should generate from contents of distributation 'ZEBRA_LANGUAGE' => { 'en' => 1, 'fr' => 1, 'nb' => 1 }, # FIXME should generate from contents of distribution 'ZEBRA_TOKENIZER' => { chr => 1, icu => 1 }, @@ -501,12 +503,15 @@ if ($config{'INSTALL_ZEBRA'} eq "yes") { push @{ $pl_files->{'rewrite-config.PL'} }, ( 'blib/ZEBRA_CONF_DIR/etc/passwd', 'blib/ZEBRA_CONF_DIR/zebra-biblios.cfg', + 'blib/ZEBRA_CONF_DIR/zebra-biblios-dom.cfg', 'blib/ZEBRA_CONF_DIR/zebra-authorities.cfg', 'blib/ZEBRA_CONF_DIR/zebra-authorities-dom.cfg', 'blib/ZEBRA_CONF_DIR/explain-authorities.xml', 'blib/ZEBRA_CONF_DIR/explain-biblios.xml', 'blib/ZEBRA_CONF_DIR/retrieval-info-auth-grs1.xml', 'blib/ZEBRA_CONF_DIR/retrieval-info-auth-dom.xml', + 'blib/ZEBRA_CONF_DIR/retrieval-info-bib-grs1.xml', + 'blib/ZEBRA_CONF_DIR/retrieval-info-bib-dom.xml', ); push @{ $pl_files->{'rewrite-config.PL'} }, ( 'blib/SCRIPT_DIR/koha-zebra-ctl.sh', @@ -520,8 +525,11 @@ if ($config{'INSTALL_ZEBRA'} eq "yes") { ); } $config{'ZEBRA_AUTH_CFG'} = $config{'AUTH_INDEX_MODE'} eq 'dom' ? 'zebra-authorities-dom.cfg' : 'zebra-authorities.cfg'; + $config{'ZEBRA_BIB_CFG'} = $config{'BIB_INDEX_MODE'} eq 'dom' ? 'zebra-biblios-dom.cfg' : 'zebra-biblios.cfg'; $config{'AUTH_RETRIEVAL_CFG'} = $config{'AUTH_INDEX_MODE'} eq 'dom' ? 'retrieval-info-auth-dom.xml' : 'retrieval-info-auth-grs1.xml'; + $config{'BIB_RETRIEVAL_CFG'} = + $config{'BIB_INDEX_MODE'} eq 'dom' ? 'retrieval-info-bib-dom.xml' : 'retrieval-info-bib-grs1.xml'; } if ($config{'INSTALL_MODE'} ne "dev") { @@ -942,6 +950,20 @@ Primary language for Zebra indexing); $msg = q( Koha can use one of two different indexing modes +for the MARC bibliographic records: + +grs1 - uses the Zebra GRS-1 filter, available + for legacy support +dom - uses the DOM XML filter; offers improved + functionality. + +Bibliographic indexing mode); + $msg .= _add_valid_values_disp('BIB_INDEX_MODE', $valid_values); + $config{'BIB_INDEX_MODE'} = _get_value('BIB_INDEX_MODE', $msg, $defaults->{'BIB_INDEX_MODE'}, $valid_values, $install_log_values); + + + $msg = q( +Koha can use one of two different indexing modes for the MARC authorities records: grs1 - uses the Zebra GRS-1 filter, available diff --git a/etc/koha-conf.xml b/etc/koha-conf.xml index f5e2c0f..2cb4e52 100644 --- a/etc/koha-conf.xml +++ b/etc/koha-conf.xml @@ -27,7 +27,7 @@ __PAZPAR2_TOGGLE_XML_PRE__ <listen id="mergeserver">tcp:@:__MERGE_SERVER_PORT__</listen> <server id="mergeserver" listenref="mergeserver"> <directory>__ZEBRA_DATA_DIR__/biblios</directory> - <config>__ZEBRA_CONF_DIR__/zebra-biblios.cfg</config> + <config>__ZEBRA_CONF_DIR__/__ZEBRA_BIB_CFG__</config> <cql2rpn>__ZEBRA_CONF_DIR__/pqf.properties</cql2rpn> </server> __PAZPAR2_TOGGLE_XML_POST__ @@ -35,7 +35,7 @@ __PAZPAR2_TOGGLE_XML_POST__ <!-- BIBLIOGRAPHIC RECORDS --> <server id="biblioserver" listenref="biblioserver"> <directory>__ZEBRA_DATA_DIR__/biblios</directory> - <config>__ZEBRA_CONF_DIR__/zebra-biblios.cfg</config> + <config>__ZEBRA_CONF_DIR__/__ZEBRA_BIB_CFG__</config> <cql2rpn>__ZEBRA_CONF_DIR__/pqf.properties</cql2rpn> <!-- <docpath>xsl</docpath> --> <!-- <stylesheet>xsl/default.xsl</stylesheet> --> @@ -182,8 +182,11 @@ __PAZPAR2_TOGGLE_XML_POST__ <!-- <server id="publicserver" listenref="publicserver"> <directory>__ZEBRA_DATA_DIR__/biblios</directory> - <config>__ZEBRA_CONF_DIR__/zebra-biblios.cfg</config> + <config>__ZEBRA_CONF_DIR__/__ZEBRA_BIB_CFG__</config> <cql2rpn>__ZEBRA_CONF_DIR__/pqf.properties</cql2rpn> + <xi:include href="__KOHA_CONF_DIR__/zebradb/__AUTH_RETRIEVAL_CFG__" + xmlns:xi="http://www.w3.org/2001/XInclude"> + <xi:fallback> <retrievalinfo> <retrieval syntax="usmarc" name="F"/> <retrieval syntax="usmarc" name="B"/> @@ -232,6 +235,8 @@ __PAZPAR2_TOGGLE_XML_POST__ </backend> </retrieval> </retrievalinfo> + </xi:fallback> + </xi:include> <xi:include href="__KOHA_CONF_DIR__/zebradb/explain-biblios.xml" xmlns:xi="http://www.w3.org/2001/XInclude"> <xi:fallback> diff --git a/etc/zebradb/biblios/etc/dom-config-marc.xml b/etc/zebradb/biblios/etc/dom-config-marc.xml new file mode 100644 index 0000000..b53c68e --- /dev/null +++ b/etc/zebradb/biblios/etc/dom-config-marc.xml @@ -0,0 +1,54 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<!-- +$Id: dom-config.xml,v 1.1 2007-12-13 17:42:28 adam Exp $ + Copyright (C) 1995-2006 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, +MA 02110-1301 USA. +--> + +<!-- +Bibliographic DOM config for MARC ISO2709 input +--> +<dom> + <extract name="index"> + <xslt stylesheet="biblio-zebra-indexdefs.xsl"/> + </extract> + <retrieve name="F"> + <xslt stylesheet="identity.xsl" /> + </retrieve> + <retrieve name="usmarc"> + <xslt stylesheet="identity.xsl" /> + </retrieve> + <retrieve name="marc"> + <xslt stylesheet="identity.xsl" /> + </retrieve> + <retrieve name="marcxml"> + <xslt stylesheet="identity.xsl" /> + </retrieve> + <retrieve name="zebra"> + <xslt stylesheet="zebra.xsl"/> + </retrieve> + <retrieve name="index"> + <xslt stylesheet="biblio-zebra-indexdefs.xsl"/> + </retrieve> + <input> + <marc inputcharset="utf-8"/> + </input> +</dom> diff --git a/etc/zebradb/biblios/etc/dom-config.xml b/etc/zebradb/biblios/etc/dom-config.xml new file mode 100644 index 0000000..8b3b858 --- /dev/null +++ b/etc/zebradb/biblios/etc/dom-config.xml @@ -0,0 +1,54 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<!-- +$Id: dom-config.xml,v 1.1 2007-12-13 17:42:28 adam Exp $ + Copyright (C) 1995-2006 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, +MA 02110-1301 USA. +--> + +<!-- +Bibliographic DOM config for MARC XML input +--> +<dom> + <extract name="index"> + <xslt stylesheet="biblio-zebra-indexdefs.xsl"/> + </extract> + <retrieve name="F"> + <xslt stylesheet="identity.xsl" /> + </retrieve> + <retrieve name="usmarc"> + <xslt stylesheet="identity.xsl" /> + </retrieve> + <retrieve name="marc"> + <xslt stylesheet="identity.xsl" /> + </retrieve> + <retrieve name="marcxml"> + <xslt stylesheet="identity.xsl" /> + </retrieve> + <retrieve name="zebra"> + <xslt stylesheet="zebra.xsl"/> + </retrieve> + <retrieve name="index"> + <xslt stylesheet="biblio-zebra-indexdefs.xsl"/> + </retrieve> + <input> + <xmlreader level="0"/> + </input> +</dom> diff --git a/etc/zebradb/retrieval-info-bib-dom.xml b/etc/zebradb/retrieval-info-bib-dom.xml new file mode 100644 index 0000000..3093c72 --- /dev/null +++ b/etc/zebradb/retrieval-info-bib-dom.xml @@ -0,0 +1,22 @@ +<?xml version="1.0" encoding="UTF-8"?> +<retrievalinfo xmlns="http://indexdata.com/yaz"> + <retrieval syntax="usmarc" name="F"> + <backend syntax="xml" name="marc"> + <marc inputformat="xml" outputformat="marc" + inputcharset="utf-8" + outputcharset="utf-8"/> + </backend> + </retrieval> + <retrieval syntax="usmarc" name="B"> + <backend syntax="xml" name="marc"> + <marc inputformat="xml" outputformat="marc" + inputcharset="utf-8" + outputcharset="utf-8"/> + </backend> + </retrieval> + <retrieval syntax="xml" name="index"/> <!-- allow viewing index entries --> + <retrieval syntax="xml" name="marc" + identifier="info:srw/schema/1/marcxml-v1.1"/> + <retrieval syntax="xml" name="marcxml" + identifier="info:srw/schema/1/marcxml-v1.1"/> +</retrievalinfo> diff --git a/etc/zebradb/retrieval-info-bib-grs1.xml b/etc/zebradb/retrieval-info-bib-grs1.xml new file mode 100644 index 0000000..58f319d --- /dev/null +++ b/etc/zebradb/retrieval-info-bib-grs1.xml @@ -0,0 +1,40 @@ +<?xml version="1.0" encoding="UTF-8"?> +<retrievalinfo xmlns="http://indexdata.com/yaz"> + <retrieval syntax="usmarc" name="F"/> + <retrieval syntax="usmarc" name="B"/> + <retrieval syntax="xml" name="marcxml" + identifier="info:srw/schema/1/marcxml-v1.1"> + <backend syntax="usmarc" name="F"> + <marc inputformat="marc" outputformat="marcxml" + inputcharset="utf-8"/> + </backend> + </retrieval> + <retrieval syntax="xml" name="dc"> + <backend syntax="usmarc" name="F"> + <marc inputformat="marc" outputformat="marcxml" + inputcharset="utf-8"/> + <xslt stylesheet="__INTRANET_TMPL_DIR__/prog/en/xslt/MARC21slim2DC.xsl"/> + </backend> + </retrieval> + <retrieval syntax="xml" name="mods"> + <backend syntax="usmarc" name="F"> + <marc inputformat="marc" outputformat="marcxml" + inputcharset="utf-8"/> + <xslt stylesheet="__INTRANET_TMPL_DIR__/prog/en/xslt/MARC21slim2MODS.xsl"/> + </backend> + </retrieval> + <retrieval syntax="xml" name="rdfdc"> + <backend syntax="usmarc" name="F"> + <marc inputformat="marc" outputformat="marcxml" + inputcharset="utf-8"/> + <xslt stylesheet="__INTRANET_TMPL_DIR__/prog/en/xslt/MARC21slim2RDFDC.xsl"/> + </backend> + </retrieval> + <retrieval syntax="xml" name="utils"> + <backend syntax="usmarc" name="F"> + <marc inputformat="marc" outputformat="marcxml" + inputcharset="utf-8"/> + <xslt stylesheet="__INTRANET_TMPL_DIR__/prog/en/xslt/MARC21slimUtils.xsl"/> + </backend> + </retrieval> +</retrievalinfo> diff --git a/etc/zebradb/zebra-biblios-dom.cfg b/etc/zebradb/zebra-biblios-dom.cfg new file mode 100644 index 0000000..a748ddd --- /dev/null +++ b/etc/zebradb/zebra-biblios-dom.cfg @@ -0,0 +1,55 @@ +# Simple Zebra configuration file that defines +# a database with MARCXML records. +# $Id: zebra.cfg,v 1.1.2.2 2006/05/09 12:03:16 rangi Exp $ +# +# Where are the config files located? +profilePath:__ZEBRA_CONF_DIR__/biblios/etc:__ZEBRA_CONF_DIR__/etc:__ZEBRA_CONF_DIR__/marc_defs/__ZEBRA_MARC_FORMAT__/biblios:__ZEBRA_CONF_DIR__/lang_defs/__ZEBRA_LANGUAGE__:__ZEBRA_CONF_DIR__/xsl +# modulePath - where to look for loadable zebra modules +modulePath: /usr/lib/idzebra-2.0/modules + +encoding: UTF-8 +# Files that describe the attribute sets supported. +attset: bib1.att +attset: explain.att +attset: gils.att + +# systag sysno rank + +# Specify record type +# group .recordType[ .name ]: type +# type is split into fundamental type. file-read-type . argument +# http://www.indexdata.dk/zebra/doc/zebra-cfg.tkl +# http://www.indexdata.dk/zebra/doc/grs.tkl + +recordtype: dom.__ZEBRA_CONF_DIR__/biblios/etc/dom-config.xml +marcxml.recordtype: dom.__ZEBRA_CONF_DIR__/biblios/etc/dom-config.xml +iso2709.recordtype: dom.__ZEBRA_CONF_DIR__/biblios/etc/dom-config-marc.xml + +recordId: (bib1,Local-number) +storeKeys:1 +storeData:1 + + +# Lock File Area +lockDir: __ZEBRA_LOCK_DIR__/biblios +perm.anonymous:ar +perm.__ZEBRA_USER__:rw +passwd: __ZEBRA_CONF_DIR__/etc/passwd +register: __ZEBRA_DATA_DIR__/biblios/register:20G +shadow: __ZEBRA_DATA_DIR__/biblios/shadow:20G + +# Temp File area for result sets +setTmpDir: __ZEBRA_DATA_DIR__/biblios/tmp + +# Temp File area for index program +keyTmpDir: __ZEBRA_DATA_DIR__/biblios/key + +# Approx. Memory usage during indexing +memMax: 50M +rank:rank-1 +truncmax: 1000000000 + +# Specifies the maximum number of records that will be sorted in a result set. +# If the result set contains more than that limit, the records after the limit +# will not be sorted. If omitted, the default value is 1,000. +sortmax: 1000 diff --git a/misc/koha-install-log b/misc/koha-install-log index 6f6a0eb..d842484 100644 --- a/misc/koha-install-log +++ b/misc/koha-install-log @@ -44,7 +44,9 @@ ZEBRA_RUN_DIR=__ZEBRA_RUN_DIR__ ZEBRA_MARC_FORMAT=__ZEBRA_MARC_FORMAT__ ZEBRA_LANGUAGE=__ZEBRA_LANGUAGE__ ZEBRA_AUTH_CFG=__ZEBRA_AUTH_CFG__ +ZEBRA_BIB_CFG=__ZEBRA_BIB_CFG__ AUTH_RETRIEVAL_CFG=__AUTH_RETRIEVAL_CFG__ +BIB_RETRIEVAL_CFG=__BIB_RETRIEVAL_CFG__ MERGE_SERVER_HOST=__MERGE_SERVER_HOST__ MERGE_SERVER_PORT=__MERGE_SERVER_PORT__ PAZPAR2_HOST=__PAZPAR2_HOST__ @@ -54,7 +56,7 @@ INSTALL_BASE=__INSTALL_BASE__ INSTALL_ZEBRA=__INSTALL_ZEBRA__ INSTALL_SRU=__INSTALL_SRU__ INSTALL_PAZPAR2=__INSTALL_PAZPAR2__ -AUTH_INDEX_MODE=__AUTH_INDEX_MODE__ +BIB_INDEX_MODE=__BIB_INDEX_MODE__ RUN_DATABASE_TESTS=__RUN_DATABASE_TESTS__ PATH_TO_ZEBRA=__PATH_TO_ZEBRA__ USE_MEMCACHED=__USE_MEMCACHED__ diff --git a/rewrite-config.PL b/rewrite-config.PL index 967a546..083b3a8 100644 --- a/rewrite-config.PL +++ b/rewrite-config.PL @@ -122,7 +122,9 @@ $prefix = $ENV{'INSTALL_BASE'} || "/usr"; '__ZEBRA_LANGUAGE__' => 'en', '__ZEBRA_TOKENIZER_STMT__' => 'charmap word-phrase-utf.chr', '__ZEBRA_AUTH_CFG__' => 'zebra-authorities.cfg', + '__ZEBRA_BIB_CFG__' => 'zebra-biblios.cfg', '__AUTH_RETRIEVAL_CFG__' => 'retrieval-info-auth-grs1.xml', + '__BIB_RETRIEVAL_CFG__' => 'retrieval-info-bib-grs1.xml', "__MERGE_SERVER_HOST__" => $myhost, "__MERGE_SERVER_PORT__" => '11001', "__PAZPAR2_HOST__" => $myhost, @@ -135,6 +137,7 @@ $prefix = $ENV{'INSTALL_BASE'} || "/usr"; "__PAZPAR2_TOGGLE_XML_PRE__" => '<!--', "__PAZPAR2_TOGGLE_XML_POST__" => '-->', "__AUTH_INDEX_MODE__" => 'grs1', + "__BIB_INDEX_MODE__" => 'grs1', "__RUN_DATABASE_TESTS__" => 'no', "__PATH_TO_ZEBRA__" => "", "__USE_MEMCACHED__" => 'no', -- 1.7.2.5 _______________________________________________ Koha-patches mailing list [email protected] http://lists.koha-community.org/cgi-bin/mailman/listinfo/koha-patches website : http://www.koha-community.org/ git : http://git.koha-community.org/ bugs : http://bugs.koha-community.org/
