This is an automated email from the ASF dual-hosted git repository. acosentino pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/camel.git
The following commit(s) were added to refs/heads/main by this push: new 7e3c1428614 CAMEL-20739 - Camel-Pinecone: Add a datatype for transforming langchain embeddings in Pinecone objects (#14061) 7e3c1428614 is described below commit 7e3c1428614ca6ae5193dfa017fc08db6902a849 Author: Andrea Cosentino <anco...@gmail.com> AuthorDate: Mon May 6 14:41:27 2024 +0200 CAMEL-20739 - Camel-Pinecone: Add a datatype for transforming langchain embeddings in Pinecone objects (#14061) Signed-off-by: Andrea Cosentino <anco...@gmail.com> --- .../apache/camel/catalog/transformers.properties | 1 + .../catalog/transformers/pinecone-embeddings.json | 14 ++ .../camel-ai/camel-langchain4j-embeddings/pom.xml | 5 + ...Chain4jEmbeddingsComponentPineconeTargetIT.java | 149 +++++++++++++++++++++ .../org/apache/camel/transformer.properties | 7 + .../apache/camel/transformer/pinecone-embeddings | 2 + .../camel/transformer/pinecone-embeddings.json | 14 ++ .../PineconeEmbeddingsDataTypeTransformer.java | 45 +++++++ 8 files changed, 237 insertions(+) diff --git a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/transformers.properties b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/transformers.properties index 3b404011535..4efe6eae08f 100644 --- a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/transformers.properties +++ b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/transformers.properties @@ -27,6 +27,7 @@ google-sheets-stream-application-cloudevents google-storage-application-cloudevents http-application-cloudevents milvus-embeddings +pinecone-embeddings protobuf-binary protobuf-x-java-object protobuf-x-struct diff --git a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/transformers/pinecone-embeddings.json b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/transformers/pinecone-embeddings.json new file mode 100644 index 00000000000..42b2bf34e7b --- /dev/null +++ b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/transformers/pinecone-embeddings.json @@ -0,0 +1,14 @@ +{ + "transformer": { + "kind": "transformer", + "name": "pinecone:embeddings", + "title": "Pinecone (Embeddings)", + "description": "Prepares the message to become an object writable by Pinecone component", + "deprecated": false, + "javaType": "org.apache.camel.component.pinecone.transform.PineconeEmbeddingsDataTypeTransformer", + "groupId": "org.apache.camel", + "artifactId": "camel-pinecone", + "version": "4.6.0-SNAPSHOT" + } +} + diff --git a/components/camel-ai/camel-langchain4j-embeddings/pom.xml b/components/camel-ai/camel-langchain4j-embeddings/pom.xml index 06793560c05..79bb7cf9192 100644 --- a/components/camel-ai/camel-langchain4j-embeddings/pom.xml +++ b/components/camel-ai/camel-langchain4j-embeddings/pom.xml @@ -69,6 +69,11 @@ <artifactId>camel-milvus</artifactId> <scope>test</scope> </dependency> + <dependency> + <groupId>org.apache.camel</groupId> + <artifactId>camel-pinecone</artifactId> + <scope>test</scope> + </dependency> <dependency> <groupId>org.apache.camel</groupId> <artifactId>camel-test-junit5</artifactId> diff --git a/components/camel-ai/camel-langchain4j-embeddings/src/test/java/org/apache/camel/component/langchain4j/embeddings/LangChain4jEmbeddingsComponentPineconeTargetIT.java b/components/camel-ai/camel-langchain4j-embeddings/src/test/java/org/apache/camel/component/langchain4j/embeddings/LangChain4jEmbeddingsComponentPineconeTargetIT.java new file mode 100644 index 00000000000..9e5b3c51621 --- /dev/null +++ b/components/camel-ai/camel-langchain4j-embeddings/src/test/java/org/apache/camel/component/langchain4j/embeddings/LangChain4jEmbeddingsComponentPineconeTargetIT.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.camel.component.langchain4j.embeddings; + +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import dev.langchain4j.model.embedding.AllMiniLmL6V2EmbeddingModel; +import io.pinecone.unsigned_indices_model.QueryResponseWithUnsignedIndices; +import org.apache.camel.CamelContext; +import org.apache.camel.Exchange; +import org.apache.camel.RoutesBuilder; +import org.apache.camel.builder.RouteBuilder; +import org.apache.camel.component.pinecone.PineconeVectorDb; +import org.apache.camel.component.pinecone.PineconeVectorDbAction; +import org.apache.camel.spi.DataType; +import org.apache.camel.test.junit5.CamelTestSupport; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.condition.EnabledIfSystemProperties; +import org.junit.jupiter.api.condition.EnabledIfSystemProperty; + +import static org.assertj.core.api.Assertions.assertThat; + +// Must be manually tested. Provide your own accessKey and secretKey using -Dpinecone.token +@EnabledIfSystemProperties({ + @EnabledIfSystemProperty(named = "pinecone.token", matches = ".*", disabledReason = "Pinecone token not provided"), +}) +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +public class LangChain4jEmbeddingsComponentPineconeTargetIT extends CamelTestSupport { + public static final long POINT_ID = 8; + public static final String PINECONE_URI = "pinecone:embeddings?token={{pinecone.token}}"; + + @Override + protected CamelContext createCamelContext() throws Exception { + CamelContext context = super.createCamelContext(); + + context.getRegistry().bind("embedding-model", new AllMiniLmL6V2EmbeddingModel()); + + return context; + } + + @Test + @Order(1) + public void createServerlessIndex() { + + Exchange result = fluentTemplate.to(PINECONE_URI) + .withHeader(PineconeVectorDb.Headers.ACTION, PineconeVectorDbAction.CREATE_SERVERLESS_INDEX) + .withBody( + "hello") + .withHeader(PineconeVectorDb.Headers.INDEX_NAME, "embeddings") + .withHeader(PineconeVectorDb.Headers.COLLECTION_SIMILARITY_METRIC, "cosine") + .withHeader(PineconeVectorDb.Headers.COLLECTION_DIMENSION, 384) + .withHeader(PineconeVectorDb.Headers.COLLECTION_CLOUD, "aws") + .withHeader(PineconeVectorDb.Headers.COLLECTION_CLOUD_REGION, "us-east-1") + .request(Exchange.class); + + assertThat(result).isNotNull(); + assertThat(result.getException()).isNull(); + } + + @Test + @Order(2) + public void upsert() { + + Exchange result = fluentTemplate.to("direct:in") + .withHeader(PineconeVectorDb.Headers.ACTION, PineconeVectorDbAction.UPSERT) + .withBody("hi") + .withHeader(PineconeVectorDb.Headers.INDEX_NAME, "embeddings") + .withHeader(PineconeVectorDb.Headers.INDEX_ID, "elements") + .request(Exchange.class); + + assertThat(result).isNotNull(); + assertThat(result.getException()).isNull(); + } + + @Test + @Order(3) + public void queryByVector() { + + List<Float> elements = generateFloatVector(); + + Exchange result = fluentTemplate.to(PINECONE_URI) + .withHeader(PineconeVectorDb.Headers.ACTION, PineconeVectorDbAction.QUERY) + .withBody( + elements) + .withHeader(PineconeVectorDb.Headers.INDEX_NAME, "embeddings") + .withHeader(PineconeVectorDb.Headers.QUERY_TOP_K, 384) + .request(Exchange.class); + + assertThat(result).isNotNull(); + assertThat(result.getException()).isNull(); + assertThat(((QueryResponseWithUnsignedIndices) result.getMessage().getBody()).getMatchesList()).isNotNull(); + } + + @Test + @Order(4) + public void deleteIndex() { + + Exchange result = fluentTemplate.to(PINECONE_URI) + .withHeader(PineconeVectorDb.Headers.ACTION, PineconeVectorDbAction.DELETE_INDEX) + .withBody( + "test") + .withHeader(PineconeVectorDb.Headers.INDEX_NAME, "embeddings") + .request(Exchange.class); + + assertThat(result).isNotNull(); + assertThat(result.getException()).isNull(); + } + + @Override + protected RoutesBuilder createRouteBuilder() { + return new RouteBuilder() { + public void configure() { + from("direct:in") + .to("langchain4j-embeddings:test") + .setHeader(PineconeVectorDb.Headers.ACTION).constant(PineconeVectorDbAction.UPSERT) + .setHeader(PineconeVectorDb.Headers.INDEX_ID).constant(POINT_ID) + .transform( + new DataType("pinecone:embeddings")) + .to(PINECONE_URI); + } + }; + } + + private List<Float> generateFloatVector() { + Random ran = new Random(); + List<Float> vector = new ArrayList<>(); + for (int i = 0; i < 384; ++i) { + vector.add(ran.nextFloat()); + } + return vector; + } +} diff --git a/components/camel-pinecone/src/generated/resources/META-INF/services/org/apache/camel/transformer.properties b/components/camel-pinecone/src/generated/resources/META-INF/services/org/apache/camel/transformer.properties new file mode 100644 index 00000000000..a89223e117e --- /dev/null +++ b/components/camel-pinecone/src/generated/resources/META-INF/services/org/apache/camel/transformer.properties @@ -0,0 +1,7 @@ +# Generated by camel build tools - do NOT edit this file! +transformers=pinecone:embeddings +groupId=org.apache.camel +artifactId=camel-pinecone +version=4.6.0-SNAPSHOT +projectName=Camel :: Pinecone +projectDescription=Camel Pinecone support diff --git a/components/camel-pinecone/src/generated/resources/META-INF/services/org/apache/camel/transformer/pinecone-embeddings b/components/camel-pinecone/src/generated/resources/META-INF/services/org/apache/camel/transformer/pinecone-embeddings new file mode 100644 index 00000000000..026f84b6c31 --- /dev/null +++ b/components/camel-pinecone/src/generated/resources/META-INF/services/org/apache/camel/transformer/pinecone-embeddings @@ -0,0 +1,2 @@ +# Generated by camel build tools - do NOT edit this file! +class=org.apache.camel.component.pinecone.transform.PineconeEmbeddingsDataTypeTransformer diff --git a/components/camel-pinecone/src/generated/resources/META-INF/services/org/apache/camel/transformer/pinecone-embeddings.json b/components/camel-pinecone/src/generated/resources/META-INF/services/org/apache/camel/transformer/pinecone-embeddings.json new file mode 100644 index 00000000000..42b2bf34e7b --- /dev/null +++ b/components/camel-pinecone/src/generated/resources/META-INF/services/org/apache/camel/transformer/pinecone-embeddings.json @@ -0,0 +1,14 @@ +{ + "transformer": { + "kind": "transformer", + "name": "pinecone:embeddings", + "title": "Pinecone (Embeddings)", + "description": "Prepares the message to become an object writable by Pinecone component", + "deprecated": false, + "javaType": "org.apache.camel.component.pinecone.transform.PineconeEmbeddingsDataTypeTransformer", + "groupId": "org.apache.camel", + "artifactId": "camel-pinecone", + "version": "4.6.0-SNAPSHOT" + } +} + diff --git a/components/camel-pinecone/src/main/java/org/apache/camel/component/pinecone/transform/PineconeEmbeddingsDataTypeTransformer.java b/components/camel-pinecone/src/main/java/org/apache/camel/component/pinecone/transform/PineconeEmbeddingsDataTypeTransformer.java new file mode 100644 index 00000000000..c3d4329bbbe --- /dev/null +++ b/components/camel-pinecone/src/main/java/org/apache/camel/component/pinecone/transform/PineconeEmbeddingsDataTypeTransformer.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.camel.component.pinecone.transform; + +import java.util.UUID; + +import dev.langchain4j.data.embedding.Embedding; +import org.apache.camel.Message; +import org.apache.camel.component.pinecone.PineconeVectorDb; +import org.apache.camel.spi.DataType; +import org.apache.camel.spi.DataTypeTransformer; +import org.apache.camel.spi.Transformer; + +/** + * Maps a LangChain4j Embeddings to a Pinecone InsertParam/Upsert Param to write an embeddings vector on a Pinecone + * Database. + */ +@DataTypeTransformer(name = "pinecone:embeddings", + description = "Prepares the message to become an object writable by Pinecone component") +public class PineconeEmbeddingsDataTypeTransformer extends Transformer { + + @Override + public void transform(Message message, DataType fromType, DataType toType) { + Embedding embedding = message.getHeader("CamelLangChain4jEmbeddingsVector", Embedding.class); + + message.setHeader(PineconeVectorDb.Headers.INDEX_NAME, "embeddings"); + message.setHeader(PineconeVectorDb.Headers.INDEX_ID, UUID.randomUUID()); + message.setBody(embedding.vectorAsList()); + } +}