sunank200 commented on code in PR #34891:
URL: https://github.com/apache/airflow/pull/34891#discussion_r1357077212


##########
airflow/providers/postgres/hooks/postgres.py:
##########
@@ -320,6 +320,29 @@ def _generate_insert_sql(
 
         return sql
 
+    def ingest_embedding(
+        self, table: str, input_data: list[str], embeddings: list[float], 
vector_size: int
+    ) -> None:
+        """
+        Store embedding vector in Postgres table.
+
+        :param table: The Name of the table
+        :param input_data: The source data from which the embedding has been 
created
+        :param embeddings: The embedding vector response from LLM service
+        :param vector_size: The size of vector. The maximum dimensions can be 
2,000
+        """
+        from pgvector.psycopg import register_vector
+
+        self.conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
+        register_vector(self.conn)
+        self.conn.execute(
+            "CREATE TABLE IF NOT EXISTS %s (id bigserial PRIMARY KEY, content 
text, embedding vector(%s))",
+            (table, vector_size),
+        )
+
+        for content, embedding in zip(input_data, embeddings):
+            self.execute("INSERT INTO %s (content, embedding) VALUES (%s, 
%s)", (table, content, embedding))

Review Comment:
   
https://github.com/apache/airflow/pull/34891/commits/90dfdba912eaf82795c234a0bae6db2f1e0ac175



##########
airflow/providers/postgres/operators/postgres.py:
##########
@@ -82,3 +85,42 @@ def __init__(
             AirflowProviderDeprecationWarning,
             stacklevel=2,
         )
+
+
+class PgVectorIngestOperator(BaseOperator):

Review Comment:
   
https://github.com/apache/airflow/pull/34891/commits/90dfdba912eaf82795c234a0bae6db2f1e0ac175



##########
airflow/providers/postgres/provider.yaml:
##########
@@ -53,6 +53,7 @@ dependencies:
   - apache-airflow>=2.4.0
   - apache-airflow-providers-common-sql>=1.3.1
   - psycopg2-binary>=2.8.0
+  - pgvector

Review Comment:
   
https://github.com/apache/airflow/pull/34891/commits/90dfdba912eaf82795c234a0bae6db2f1e0ac175



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to