This is an automated email from the ASF dual-hosted git repository.

reschke pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 8c02534da5 OAK-10694: Remove oak-search-mt (#1410)
8c02534da5 is described below

commit 8c02534da5c988f406008acacfe67c15a4467ad2
Author: Julian Reschke <resc...@apache.org>
AuthorDate: Tue Apr 9 16:10:11 2024 +0200

    OAK-10694: Remove oak-search-mt (#1410)
---
 oak-search-mt/pom.xml                              | 160 ---------------------
 .../index/mt/MTFulltextQueryTermsProvider.java     | 144 -------------------
 .../mt/MTFulltextQueryTermsProviderFactory.java    | 144 -------------------
 .../index/mt/MTFulltextQueryTermsProviderTest.java |  64 ---------
 pom.xml                                            |   1 -
 5 files changed, 513 deletions(-)

diff --git a/oak-search-mt/pom.xml b/oak-search-mt/pom.xml
deleted file mode 100644
index e32873ad6d..0000000000
--- a/oak-search-mt/pom.xml
+++ /dev/null
@@ -1,160 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-  -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd ">
-    <modelVersion>4.0.0</modelVersion>
-
-    <parent>
-        <groupId>org.apache.jackrabbit</groupId>
-        <artifactId>oak-parent</artifactId>
-        <version>1.63-SNAPSHOT</version>
-        <relativePath>../oak-parent/pom.xml</relativePath>
-    </parent>
-
-    <artifactId>oak-search-mt</artifactId>
-    <name>Oak Search Machine Translation</name>
-    <packaging>bundle</packaging>
-    <description>Machine Translation extension for Oak search</description>
-
-    <build>
-        <plugins>
-            <plugin>
-                <groupId>org.apache.felix</groupId>
-                <artifactId>maven-bundle-plugin</artifactId>
-                <extensions>true</extensions>
-                <configuration>
-                    <instructions>
-                        <Export-Package>
-                            !*
-                        </Export-Package>
-                        
<Embed-Dependency>*;scope=compile,artifactId=!oak-lucene</Embed-Dependency>
-                        <Import-Package>
-                            com.ibm.uvm.tools.*;resolution:=optional,
-                            com.sun.jdmk.comm.*;resolution:=optional,
-                            com.sun.net.httpserver.*;resolution:=optional,
-                            edu.uci.ics.*;resolution:=optional,
-                            javax.jms.*;resolution:=optional,
-                            javax.jmdns.*;resolution:=optional,
-                            junit.framework.*;resolution:=optional,
-                            
org.apache.commons.collections15.*;resolution:=optional,
-                            org.apache.tools.ant.*;resolution:=optional,
-                            org.apache.tools.ant.types.*;resolution:=optional,
-                            org.easymock.*;resolution:=optional,
-                            org.jmock.core.*;resolution:=optional,
-                            sun.misc.*;resolution:=optional,
-                            
EDU.oswego.cs.dl.util.concurrent.*;resolution:=optional,
-                            org.kohsuke.args4j.*;resolution:=optional,
-                            *
-                        </Import-Package>
-                    </instructions>
-                </configuration>
-                <executions>
-                    <execution>
-                        <id>baseline</id>
-                        <goals>
-                            <goal>baseline</goal>
-                        </goals>
-                        <phase>pre-integration-test</phase>
-                    </execution>
-                </executions>
-            </plugin>
-            <plugin>
-                <groupId>org.apache.rat</groupId>
-                <artifactId>apache-rat-plugin</artifactId>
-            </plugin>
-        </plugins>
-    </build>
-
-    <dependencies>
-        <!-- Optional OSGi dependencies, used only when running within OSGi -->
-        <dependency>
-            <groupId>org.osgi</groupId>
-            <artifactId>org.osgi.service.component.annotations</artifactId>
-            <scope>provided</scope>
-        </dependency>
-
-        <dependency>
-            <groupId>org.apache.jackrabbit</groupId>
-            <artifactId>oak-lucene</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.joshua</groupId>
-            <artifactId>joshua-incubating</artifactId>
-            <version>6.1</version>
-            <exclusions>
-                <exclusion>
-                    <groupId>org.slf4j</groupId>
-                    <artifactId>slf4j-log4j12</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.lucene</groupId>
-            <artifactId>lucene-queryparser</artifactId>
-            <version>${lucene.version}</version>
-            <scope>provided</scope>
-        </dependency>
-
-        <!-- Nullability annotations -->
-        <dependency>
-            <groupId>org.jetbrains</groupId>
-            <artifactId>annotations</artifactId>
-            <scope>provided</scope>
-        </dependency>
-
-        <!-- Test Dependencies -->
-        <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>org.mockito</groupId>
-            <artifactId>mockito-core</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>colt</groupId>
-            <artifactId>colt</artifactId>
-            <version>1.2.0</version>
-        </dependency>
-        <dependency>
-            <groupId>edu.berkeley.nlp</groupId>
-            <artifactId>berkeleylm</artifactId>
-            <version>1.1.2</version>
-        </dependency>
-        <dependency>
-            <groupId>commons-cli</groupId>
-            <artifactId>commons-cli</artifactId>
-            <version>1.2</version>
-        </dependency>
-        <dependency>
-            <groupId>commons-io</groupId>
-            <artifactId>commons-io</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.commons</groupId>
-            <artifactId>commons-math3</artifactId>
-        </dependency>
-
-    </dependencies>
-</project>
-
diff --git 
a/oak-search-mt/src/main/java/org/apache/jackrabbit/oak/plugins/index/mt/MTFulltextQueryTermsProvider.java
 
b/oak-search-mt/src/main/java/org/apache/jackrabbit/oak/plugins/index/mt/MTFulltextQueryTermsProvider.java
deleted file mode 100644
index 6d99147233..0000000000
--- 
a/oak-search-mt/src/main/java/org/apache/jackrabbit/oak/plugins/index/mt/MTFulltextQueryTermsProvider.java
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.jackrabbit.oak.plugins.index.mt;
-
-import java.io.StringReader;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.jackrabbit.oak.plugins.index.lucene.OakAnalyzer;
-import 
org.apache.jackrabbit.oak.plugins.index.lucene.spi.FulltextQueryTermsProvider;
-import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
-import org.apache.jackrabbit.oak.spi.state.NodeState;
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.StructuredTranslation;
-import org.apache.joshua.decoder.Translation;
-import org.apache.joshua.decoder.segment_file.Sentence;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.queryparser.simple.SimpleQueryParser;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.util.Version;
-import org.jetbrains.annotations.NotNull;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * {@link FulltextQueryTermsProvider} that performs machine translation on 
full text returning a query containing
- * translated tokens.
- * @deprecated see OAK-10694
- */
-@Deprecated(forRemoval = true)
-public class MTFulltextQueryTermsProvider implements 
FulltextQueryTermsProvider {
-
-    private final Logger log = LoggerFactory.getLogger(getClass());
-
-    private final Decoder decoder;
-    private final Set<String> nodeTypes;
-    private final float minScore;
-    private final SimpleQueryParser qp;
-
-    public MTFulltextQueryTermsProvider(Decoder decoder, Set<String> 
nodeTypes, float minScore) {
-        this.decoder = decoder;
-        this.nodeTypes = nodeTypes;
-        this.minScore = minScore;
-        this.qp = new SimpleQueryParser(new OakAnalyzer(Version.LUCENE_47), 
FieldNames.FULLTEXT);
-    }
-
-    @Override
-    public Query getQueryTerm(String text, Analyzer analyzer, NodeState 
indexDefinition) {
-
-        BooleanQuery query = new BooleanQuery();
-        try {
-            Sentence sentence = new Sentence(text, text.hashCode(), 
decoder.getJoshuaConfiguration());
-            Translation translation = decoder.decode(sentence);
-            log.debug("{} decoded into {}", text, translation);
-            query.add(new BooleanClause(new TermQuery(new 
Term(FieldNames.FULLTEXT, translation.toString())), 
BooleanClause.Occur.SHOULD));
-
-
-            // try phrase translation first
-            List<StructuredTranslation> structuredTranslations = 
translation.getStructuredTranslations();
-            log.debug("found {} structured translations", 
structuredTranslations.size());
-            if (!structuredTranslations.isEmpty()) {
-                log.debug("phrase translation");
-                addTranslations(query, structuredTranslations);
-            } else {
-                // if phrase cannot be translated, perform token by token 
translation
-                log.debug("per token translation");
-
-                TokenStream tokenStream = analyzer.tokenStream(null, new 
StringReader(text));
-                tokenStream.addAttribute(CharTermAttribute.class);
-                tokenStream.reset();
-                while (tokenStream.incrementToken()) {
-                    CharTermAttribute attribute = 
tokenStream.getAttribute(CharTermAttribute.class);
-                    String source = attribute.toString();
-                    Translation translatedToken = decoder.decode(new 
Sentence(source, source.hashCode(),
-                            decoder.getJoshuaConfiguration()));
-                    addTranslations(query, 
translatedToken.getStructuredTranslations());
-                }
-                tokenStream.end();
-            }
-
-        } catch (Exception e) {
-            log.error("could not translate query", e);
-        }
-        return query.clauses().size() > 0 ? query : null;
-    }
-
-    private void addTranslations(BooleanQuery query, 
List<StructuredTranslation> structuredTranslations) {
-        for (StructuredTranslation st : structuredTranslations) {
-            String translationString = st.getTranslationString();
-            float translationScore = st.getTranslationScore();
-            log.debug("translation {} has score {}", translationString, 
translationScore);
-            if (translationScore > minScore) {
-                log.debug("translation score for {} is {}", translationString, 
translationScore);
-                query.add(new 
BooleanClause(qp.createPhraseQuery(FieldNames.FULLTEXT, translationString),
-                        BooleanClause.Occur.SHOULD));
-                log.debug("added query for translated phrase {}", 
translationString);
-                List<String> translationTokens = st.getTranslationTokens();
-                int i = 0;
-                // if output is a phrase, look for tokens having a word 
alignment to the original sentence terms
-                for (List<Integer> wa : st.getTranslationWordAlignments()) {
-                    if (!wa.isEmpty()) {
-                        String translatedTerm = translationTokens.get(i);
-                        Query termQuery = qp.parse(translatedTerm);
-                        query.add(new BooleanClause(termQuery, 
BooleanClause.Occur.SHOULD));
-                        log.debug("added query for translated token {}", 
translatedTerm);
-                    }
-                    i++;
-                }
-            }
-        }
-    }
-
-    public void clearResources() {
-        decoder.cleanUp();
-    }
-
-    @NotNull
-    @Override
-    public Set<String> getSupportedTypes() {
-        return nodeTypes;
-    }
-}
diff --git 
a/oak-search-mt/src/main/java/org/apache/jackrabbit/oak/plugins/index/mt/MTFulltextQueryTermsProviderFactory.java
 
b/oak-search-mt/src/main/java/org/apache/jackrabbit/oak/plugins/index/mt/MTFulltextQueryTermsProviderFactory.java
deleted file mode 100644
index 821f094640..0000000000
--- 
a/oak-search-mt/src/main/java/org/apache/jackrabbit/oak/plugins/index/mt/MTFulltextQueryTermsProviderFactory.java
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.jackrabbit.oak.plugins.index.mt;
-
-import java.io.File;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Set;
-import org.osgi.service.component.annotations.Activate;
-import org.osgi.service.component.annotations.Component;
-import org.osgi.service.component.annotations.ConfigurationPolicy;
-import org.osgi.service.component.annotations.Deactivate;
-import org.osgi.service.metatype.annotations.AttributeDefinition;
-import org.osgi.service.metatype.annotations.AttributeType;
-import org.osgi.service.metatype.annotations.Designate;
-import org.osgi.service.metatype.annotations.ObjectClassDefinition;
-import org.apache.jackrabbit.oak.commons.PropertiesUtil;
-import 
org.apache.jackrabbit.oak.plugins.index.lucene.spi.FulltextQueryTermsProvider;
-import org.apache.jackrabbit.oak.spi.state.NodeState;
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.search.Query;
-import org.jetbrains.annotations.NotNull;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Factory for {@link MTFulltextQueryTermsProvider}
- * @deprecated see OAK-10694
- */
-@Component(
-        service = { FulltextQueryTermsProvider.class },
-        configurationPolicy = ConfigurationPolicy.REQUIRE
-)
-@Designate(
-        ocd = MTFulltextQueryTermsProviderFactory.Configuration.class,
-        factory = true )
-@Deprecated(forRemoval = true)
-public class MTFulltextQueryTermsProviderFactory implements 
FulltextQueryTermsProvider {
-
-    @ObjectClassDefinition(
-            id = 
"org.apache.jackrabbit.oak.plugins.index.mt.MTFulltextQueryTermsProviderFactory",
-            name = "Apache Jackrabbit Oak Machine Translation Fulltext Query 
Terms Provider"
-    )
-    @interface Configuration {
-
-        @AttributeDefinition(
-                name = "Joshua Config Path",
-                description = "The absolute filesystem path to Apache Joshua 
configuration file"
-        )
-        String path_to_config();
-
-        @AttributeDefinition(
-                name = "Node types",
-                description = "List of node types for which expanding the 
query via MT",
-                cardinality = 10
-        )
-        String[] node_types();
-
-        @AttributeDefinition(
-                name = "Minimum score",
-                description = "Minimum allowed score for a translated 
phrase/term to be used for expansion",
-                type = AttributeType.FLOAT
-        )
-        float min_score() default DEFAULT_MIN_SCORE;
-    }
-
-    private static final float DEFAULT_MIN_SCORE = 0.5f;
-
-    private final Logger log = LoggerFactory.getLogger(getClass());
-
-    private MTFulltextQueryTermsProvider queryTermsProvider;
-
-    @Activate
-    public void activate(Configuration config) {
-        String pathToJoshuaConfig = 
PropertiesUtil.toString(config.path_to_config(), ".");
-        String[] nts = PropertiesUtil.toStringArray(config.node_types(), new 
String[]{"Oak:unstructured"});
-        float minScore = (float) PropertiesUtil.toDouble(config.min_score(), 
DEFAULT_MIN_SCORE);
-        log.info("activating MT FulltextQueryTermProvider from Joshua config 
at {} on {} nodetypes, minScore {}", pathToJoshuaConfig, nts, minScore);
-        Decoder decoder = null;
-        try {
-            log.debug("reading joshua config");
-            JoshuaConfiguration configuration = new JoshuaConfiguration();
-            configuration.readConfigFile(pathToJoshuaConfig);
-            configuration.setConfigFilePath(new 
File(pathToJoshuaConfig).getCanonicalFile().getParent());
-            configuration.use_structured_output = true;
-            decoder = new Decoder(configuration, pathToJoshuaConfig);
-            log.debug("decoder initialized");
-            Set<String> nodeTypes = new HashSet<>();
-            nodeTypes.addAll(Arrays.asList(nts));
-            queryTermsProvider = new MTFulltextQueryTermsProvider(decoder, 
nodeTypes, minScore);
-        } catch (Exception e) {
-            log.error("could not initialize MTFulltextQueryTermProvider", e);
-            if (decoder != null) {
-                decoder.cleanUp();
-            }
-        }
-    }
-
-    @Deactivate
-    public void deactivate() {
-        if (queryTermsProvider != null) {
-            log.debug("clearing resources");
-            queryTermsProvider.clearResources();
-        }
-    }
-
-    @Override
-    public Query getQueryTerm(String text, Analyzer analyzer, NodeState 
indexDefinition) {
-        if (queryTermsProvider != null) {
-            return queryTermsProvider.getQueryTerm(text, analyzer, 
indexDefinition);
-        } else {
-            return null;
-        }
-    }
-
-    @NotNull
-    @Override
-    public Set<String> getSupportedTypes() {
-        if (queryTermsProvider != null) {
-            return queryTermsProvider.getSupportedTypes();
-        } else {
-            return Collections.emptySet();
-        }
-    }
-}
diff --git 
a/oak-search-mt/src/test/java/org/apache/jackrabbit/oak/plugins/index/mt/MTFulltextQueryTermsProviderTest.java
 
b/oak-search-mt/src/test/java/org/apache/jackrabbit/oak/plugins/index/mt/MTFulltextQueryTermsProviderTest.java
deleted file mode 100644
index dd402c6b3f..0000000000
--- 
a/oak-search-mt/src/test/java/org/apache/jackrabbit/oak/plugins/index/mt/MTFulltextQueryTermsProviderTest.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.jackrabbit.oak.plugins.index.mt;
-
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.jackrabbit.oak.spi.state.NodeState;
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.StructuredTranslation;
-import org.apache.joshua.decoder.Translation;
-import org.apache.joshua.decoder.segment_file.Sentence;
-import org.apache.lucene.analysis.Analyzer;
-import org.junit.Test;
-
-import static org.mockito.ArgumentMatchers.any;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-/**
- * Tests for {@link MTFulltextQueryTermsProvider}
- * @deprecated see OAK-10694
- */
-@Deprecated(forRemoval = true)
-public class MTFulltextQueryTermsProviderTest {
-
-    @Test
-    public void testGetQueryTermWithPhraseTranslation() throws Exception {
-        Decoder decoder = mock(Decoder.class);
-        Translation translation = mock(Translation.class);
-        List<StructuredTranslation> translations = new LinkedList<>();
-        StructuredTranslation structuredTranslation = 
mock(StructuredTranslation.class);
-        when(structuredTranslation.getTranslationString()).thenReturn("fou 
bur");
-        translations.add(structuredTranslation);
-        when(translation.getStructuredTranslations()).thenReturn(translations);
-        when(decoder.decode(any(Sentence.class))).thenReturn(translation);
-        JoshuaConfiguration configuration = mock(JoshuaConfiguration.class);
-        when(decoder.getJoshuaConfiguration()).thenReturn(configuration);
-        Set<String> nodeTypes = new HashSet<>();
-        MTFulltextQueryTermsProvider mtFulltextQueryTermsProvider = new 
MTFulltextQueryTermsProvider(decoder, nodeTypes, -1);
-        Analyzer analyzer = mock(Analyzer.class);
-        NodeState indexDefinition = mock(NodeState.class);
-        mtFulltextQueryTermsProvider.getQueryTerm("foo bar", analyzer, 
indexDefinition);
-    }
-}
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 1b205655c9..0f065a63e3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -63,7 +63,6 @@
     <module>oak-lucene</module>
     <module>oak-solr-core</module>
     <module>oak-solr-osgi</module>
-    <module>oak-search-mt</module>
     <module>oak-auth-external</module>
     <module>oak-auth-ldap</module>
     <module>oak-run-commons</module>

Reply via email to