This is an automated email from the ASF dual-hosted git repository.

damccorm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 1d7b0bcde9e Rag opensearch usecase with Beam's MLTransform (#32018)
1d7b0bcde9e is described below

commit 1d7b0bcde9e5f6954f00845a93827d89532ac2c3
Author: Ayush Pandey <32012449+itsayushpan...@users.noreply.github.com>
AuthorDate: Thu Oct 3 09:10:12 2024 -0400

    Rag opensearch usecase with Beam's MLTransform (#32018)
    
    * Adding insertion and enrichment pipeline
    
    * Enhanced Data Schema
    
    * Added Apache Licensed to the notebook
    
    * Adding Chunking Strategy
    
    * removed unused imports
    
    * Modified insertion logic in redis for incorporating chunking strategy
    
    * refacted redis code
    
    * code review changes
    
    * Added chunking code in notebook
    
    * Added code review changes
    
    * Code review changes: using chunking strategy as enum
    
    * Added Code Review Changes
    
    * Code review changes
    
    * Added code review changes
    
    * Added Code Review Changes
    
    * Code review changes
    
    * Ingestion and Enrichment pipeline for OpenSearch Vector DB
    
    * Added logic for reading password from .env file
    
    * Added opensearch vector notebook
    
    * Update credentials.env
    
    * Added code review changes
    
    * Added Description in opensearch notebook
    
    * Added description in opensearch notebook
    
    * Code review changes
---
 .../beam-ml/rag_usecase/opensearch_connector.py    |  372 +++++
 .../beam-ml/rag_usecase/opensearch_enrichment.py   |  134 ++
 .../rag_usecase/opensearch_rag_pipeline.ipynb      | 1719 ++++++++++++++++++++
 3 files changed, 2225 insertions(+)

diff --git a/examples/notebooks/beam-ml/rag_usecase/opensearch_connector.py 
b/examples/notebooks/beam-ml/rag_usecase/opensearch_connector.py
new file mode 100644
index 00000000000..fc83c8d443c
--- /dev/null
+++ b/examples/notebooks/beam-ml/rag_usecase/opensearch_connector.py
@@ -0,0 +1,372 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import absolute_import
+
+import apache_beam as beam
+
+from apache_beam.transforms import DoFn
+from apache_beam.transforms import PTransform
+from apache_beam.transforms import Reshuffle
+
+from typing import Optional
+from opensearchpy import OpenSearch
+
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# Set the logging level to reduce verbose information
+import logging
+
+logging.root.setLevel(logging.INFO)
+logger = logging.getLogger(__name__)
+
+__all__ = ['InsertDocInOpenSearch', 'InsertEmbeddingInOpenSearch']
+
+"""This module implements IO classes to read document in Opensearch.
+
+
+Insert Doc in OpenSearch:
+-----------------
+:class:`InsertDocInOpenSearch` is a ``PTransform`` that writes key and values 
to a
+configured sink, and the write is conducted through a Opensearch pipeline.
+
+The ptransform works by getting the first and second elements from the input,
+this means that inputs like `[k,v]` or `(k,v)` are valid.
+
+Example usage::
+
+  pipeline | InsertDocInOpenSearch(host='localhost',
+                          port=6379,
+                          username='admin',
+                          password='admin'
+                          batch_size=100)
+
+
+No backward compatibility guarantees. Everything in this module is 
experimental.
+"""
+
+
+class InsertDocInOpenSearch(PTransform):
+    """InsertDocInOpensearch is a ``PTransform`` that writes a ``PCollection`` 
of
+    key, value tuple or 2-element array into a Opensearch server.
+    """
+
+    def __init__(self,
+                 host: str,
+                 port: int,
+                 username: Optional[str],
+                 password: Optional[str],
+                 batch_size: int = 100
+                 ):
+        """
+        Args:
+        host (str): The opensearch host
+        port (int): The opensearch port
+        username (str): username of OpenSearch DB
+        password (str): password of OpenSearch DB
+        batch_size(int): Number of key, values pairs to write at once
+
+        Returns:
+        :class:`~apache_beam.transforms.ptransform.PTransform`
+        """
+        self.host = host
+        self.port = port
+        self.username = username | os.getenv("OPENSEARCH_USERNAME")
+        self.password = password | os.getenv("OPENSEARCH_PASSWORD")
+        self._batch_size = batch_size
+
+        if not self.username or not self.password:
+            raise ValueError("Username and password are needed for connecting 
to Opensearch cluster.")
+
+    def expand(self, pcoll):
+        return pcoll \
+               | "Reshuffle for Opensearch Insert" >> Reshuffle() \
+               | "Insert document into Opensearch" >> 
beam.ParDo(_InsertDocOpenSearchFn(self.host,
+                                                                               
         self.port,
+                                                                               
         self.username,
+                                                                               
         self.password,
+                                                                               
         self._batch_size)
+                                                                 )
+
+
+class _InsertDocOpenSearchFn(DoFn):
+    """Abstract class that takes in Opensearch
+    credentials to connect to Opensearch DB
+    """
+
+    def __init__(self,
+                 host: str,
+                 port: int,
+                 username: str,
+                 password: str,
+                 batch_size: int = 100
+                 ):
+        self.host = host
+        self.port = port
+        self.username = username
+        self.password = password
+        self.batch_size = batch_size
+
+        self.batch_counter = 0
+        self.batch = list()
+
+        self.text_col = None
+
+    def finish_bundle(self):
+        self._flush()
+
+    def process(self, element, *args, **kwargs):
+        self.batch.append(element)
+        self.batch_counter += 1
+        if self.batch_counter >= self.batch_size:
+            self._flush()
+        yield element
+
+    def _flush(self):
+        if self.batch_counter == 0:
+            return
+
+        with _InsertDocOpenSearchSink(self.host, self.port, self.username, 
self.password) as sink:
+            sink.write(self.batch)
+            self.batch_counter = 0
+            self.batch = list()
+
+
+class _InsertDocOpenSearchSink(object):
+    """Class where we create Opensearch client
+    and write insertion logic in Opensearch
+    """
+
+    def __init__(self,
+                 host: str,
+                 port: int,
+                 username: str,
+                 password: str
+                 ):
+        self.host = host
+        self.port = port
+        self.username = username
+        self.password = password
+        self.client = None
+
+    def _create_client(self):
+        if self.client is None:
+            http_auth = [self.username, self.password]
+            self.client = OpenSearch(hosts=[f'{self.host}:{self.port}'],
+                                     http_auth=http_auth,
+                                     verify_certs=False)
+
+    def write(self, elements):
+        self._create_client()
+        documents = []
+        logger.info(f'Adding Docs in DB: {len(elements)}')
+        for element in elements:
+            documents.extend([{
+                "index": {
+                    "_index": "embeddings-index",
+                    "_id": str(element["id"]),
+                }
+            }, {
+                "url": element["url"],
+                "title": element["title"],
+                "text": element["text"],
+                "section_id": element["section_id"]
+            }])
+
+        self.client.bulk(body=documents, refresh=True)
+
+    def __enter__(self):
+        self._create_client()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if self.client is not None:
+            self.client.close()
+
+"""This module implements IO classes to read text Embeddings in Opensearch.
+Insert Embedding in Opensearch :
+-----------------
+:class:`InsertEmbeddingInOpensearch` is a ``PTransform`` that writes key and 
values to a
+configured sink, and the write is conducted through a Opensearch pipeline.
+
+The ptransform works by getting the first and second elements from the input,
+this means that inputs like `[k,v]` or `(k,v)` are valid.
+
+Example usage::
+
+  pipeline | WriteToOpensearch(host='localhost',
+                          port=6379,
+                          batch_size=100)
+
+
+No backward compatibility guarantees. Everything in this module is 
experimental.
+"""
+
+
+class InsertEmbeddingInOpenSearch(PTransform):
+    """InsertEmbeddingInOpenSearch is a ``PTransform`` that writes a 
``PCollection`` of
+    key, value tuple or 2-element array into a Opensearch server.
+    """
+
+    def __init__(self,
+                 host: str,
+                 port: int,
+                 username: Optional[str],
+                 password: Optional[str],
+                 batch_size: int = 100,
+                 embedded_columns: list = []
+                 ):
+        """
+        Args:
+        host (str): The Opensearch host
+        port (int): The Opensearch port
+        username (str): username of OpenSearch DB
+        password (str): password of OpenSearch DB
+        batch_size(int): Number of key, values pairs to write at once
+        embedded_columns (list): list of column whose embedding needs to be 
generated
+
+        Returns:
+        :class:`~apache_beam.transforms.ptransform.PTransform`
+        """
+        self.host = host
+        self.port = port
+        self.username = username | os.getenv("OPENSEARCH_USERNAME")
+        self.password = password | os.getenv("OPENSEARCH_PASSWORD")
+        self.batch_size = batch_size
+        self.embedded_columns = embedded_columns
+
+        if not self.username or not self.password:
+            raise ValueError("Username and password are needed for connecting 
to Opensearch cluster.")
+
+    def expand(self, pcoll):
+        return pcoll \
+               | "Reshuffle for Embedding in Opensearch Insert" >> Reshuffle() 
\
+               | "Write `Embeddings` to Opensearch" >> 
beam.ParDo(_WriteEmbeddingInOpenSearchFn(self.host,
+                                                                               
                 self.port,
+                                                                               
                 self.username,
+                                                                               
                 self.password,
+                                                                               
                 self.batch_size,
+                                                                               
                 self.embedded_columns))
+
+
+class _WriteEmbeddingInOpenSearchFn(DoFn):
+    """Abstract class that takes in Opensearch  credentials
+    to connect to Opensearch DB
+    """
+
+    def __init__(self,
+                 host: str,
+                 port: int,
+                 username: str,
+                 password: str,
+                 batch_size: int = 100,
+                 embedded_columns: list = []):
+        self.host = host
+        self.port = port
+        self.username = username
+        self.password = password
+        self.batch_size = batch_size
+        self.embedded_columns = embedded_columns
+
+        self.batch_counter = 0
+        self.batch = list()
+
+    def finish_bundle(self):
+        self._flush()
+
+    def process(self, element, *args, **kwargs):
+        self.batch.append(element)
+        self.batch_counter += 1
+        if self.batch_counter >= self.batch_size:
+            self._flush()
+
+    def _flush(self):
+        if self.batch_counter == 0:
+            return
+
+        with _InsertEmbeddingInOpenSearchSink(self.host, self.port, 
self.username, self.password,
+                                              self.embedded_columns) as sink:
+            sink.write(self.batch)
+
+            self.batch_counter = 0
+            self.batch = list()
+
+
+class _InsertEmbeddingInOpenSearchSink(object):
+    """Class where we create Opensearch client
+    and write text embedding  in Opensearch DB
+    """
+
+    def __init__(self, host: str,
+                 port: int,
+                 username: str,
+                 password: str,
+                 embedded_columns: list = []):
+        self.host = host
+        self.port = port
+        self.username = username
+        self.password = password
+        self.embedded_columns = embedded_columns
+        self.client = None
+
+    def _create_client(self):
+        if self.client is None:
+            http_auth = [self.username, self.password]
+            self.client = OpenSearch(hosts=[f'{self.host}:{self.port}'],
+                                     http_auth=http_auth,
+                                     verify_certs=False
+                                     )
+
+    def write(self, elements):
+        self._create_client()
+        documents = []
+        logger.info(f'Insert Embeddings in opensearch DB, 
count={len(elements)}')
+        for element in elements:
+            doc_update = {
+                "url": element["url"],
+                "section_id": element["section_id"]
+            }
+
+            for k, v in element.items():
+                if k in self.embedded_columns:
+                    doc_update[f"{k}_vector"] = v
+
+            documents.extend([{
+                "update": {
+                    "_index": "embeddings-index",
+                    "_id": str(element["id"]),
+                }
+            }, {
+                "doc": doc_update
+            }])
+        response = self.client.bulk(documents)
+        if response.get('errors'):
+            for item in response['items']:
+                if 'error' in item['update']:
+                    logger.error(f"Failed to update document ID 
{item['update']['_id']}: {item['update']['error']}")
+        logger.info(f'Insert Embeddings done')
+
+    def __enter__(self):
+        self._create_client()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if self.client is not None:
+            self.client.close()
\ No newline at end of file
diff --git a/examples/notebooks/beam-ml/rag_usecase/opensearch_enrichment.py 
b/examples/notebooks/beam-ml/rag_usecase/opensearch_enrichment.py
new file mode 100644
index 00000000000..70397550241
--- /dev/null
+++ b/examples/notebooks/beam-ml/rag_usecase/opensearch_enrichment.py
@@ -0,0 +1,134 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This module implements enrichment classes to implement semantic search on 
opensearch Vector DB.
+
+
+opensearch :Enrichment Handler
+-----------------
+:class:`opensearchEnrichmentHandler` is a ``EnrichmentSourceHandler`` that 
performs enrichment/search
+by fetching the similar text to the user query/prompt from the knowledge base 
(opensearch vector DB) and returns
+the similar text along with its embeddings as Beam.Row Object.
+
+Example usage::
+  opensearch_handler = 
opensearchEnrichmentHandler(opensearch_host='127.0.0.1', opensearch_port=6379)
+
+  pipeline | Enrichment(opensearch_handler)
+
+No backward compatibility guarantees. Everything in this module is 
experimental.
+"""
+
+import logging
+
+
+from opensearchpy import OpenSearch
+from typing import Optional
+import os
+
+import apache_beam as beam
+from apache_beam.transforms.enrichment import EnrichmentSourceHandler
+from apache_beam.transforms.enrichment_handlers.utils import ExceptionLevel
+
+__all__ = [
+    'OpenSearchEnrichmentHandler',
+]
+
+# Set the logging level to reduce verbose information
+import logging
+
+logging.root.setLevel(logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+class OpenSearchEnrichmentHandler(EnrichmentSourceHandler[beam.Row, beam.Row]):
+    """A handler for :class:`apache_beam.transforms.enrichment.Enrichment`
+    transform to interact with opensearch vector DB.
+    """
+    def __init__(
+            self,
+            opensearch_host: str,
+            opensearch_port: int,
+            username: Optional[str],
+            password: Optional[str],
+            index_name: str = "embeddings-index",
+            vector_field: str = "text_vector",
+            k: int = 1,
+            size: int = 5,
+    ):
+        """Args:
+          opensearch_host (str): opensearch Host to connect to opensearch DB
+          opensearch_port (int): opensearch Port to connect to opensearch DB
+          index_name (str): Index Name created for searching in opensearch DB
+          vector_field (str): vector field to compute similarity score in 
vector DB
+          k (int): Value of K in KNN algorithm for searching in opensearch
+        """
+        self.opensearch_host = opensearch_host
+        self.opensearch_port = opensearch_port
+        self.username = username | os.getenv("OPENSEARCH_USERNAME")
+        self.password = password | os.getenv("OPENSEARCH_PASSWORD")
+        self.index_name = index_name
+        self.vector_field = vector_field
+        self.k = k
+        self.size = size
+        self.client = None
+
+        if not self.username or not self.password:
+            raise ValueError("Username and password are needed for connecting 
to Opensearch cluster.")
+
+    def __enter__(self):
+        """connect to the opensearch DB using opensearch client.
+        """
+
+        if self.client is None:
+            http_auth = [self.username, self.password]
+            self.client = 
OpenSearch(hosts=[f'{self.opensearch_host}:{self.opensearch_port}'],
+                                     http_auth=http_auth,
+                                     verify_certs=False)
+
+    def __call__(self, request: beam.Row, *args, **kwargs):
+        """
+        Reads a row from the opensearch Vector DB and returns
+        a `Tuple` of request and response.
+
+        Args:
+        request: the input `beam.Row` to enrich.
+        """
+
+        # read embedding vector for user query
+
+        embedded_query = request['text']
+
+        # Prepare the Query
+        query = {
+            'size': self.size,
+            'query': {
+                'knn': {
+                    self.vector_field: {
+                        "vector": embedded_query,
+                        "k": self.k
+                    }
+                }
+            }
+        }
+
+        # perform vector search
+        results = self.client.search(
+            body=query,
+            index=self.index_name
+        )
+        logger.info("Enrichment_results", results)
+
+        return beam.Row(text=embedded_query), beam.Row(docs=results)
diff --git 
a/examples/notebooks/beam-ml/rag_usecase/opensearch_rag_pipeline.ipynb 
b/examples/notebooks/beam-ml/rag_usecase/opensearch_rag_pipeline.ipynb
new file mode 100644
index 00000000000..cc31ff678fe
--- /dev/null
+++ b/examples/notebooks/beam-ml/rag_usecase/opensearch_rag_pipeline.ipynb
@@ -0,0 +1,1719 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "aab0b3c8-f952-4db8-8026-74d4d0d18f2c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# @title ###### Licensed to the Apache Software Foundation (ASF), Version 
2.0 (the \"License\")\n",
+    "\n",
+    "# Licensed to the Apache Software Foundation (ASF) under one\n",
+    "# or more contributor license agreements. See the NOTICE file\n",
+    "# distributed with this work for additional information\n",
+    "# regarding copyright ownership. The ASF licenses this file\n",
+    "# to you under the Apache License, Version 2.0 (the\n",
+    "# \"License\"); you may not use this file except in compliance\n",
+    "# with the License. You may obtain a copy of the License at\n",
+    "#\n",
+    "#   http://www.apache.org/licenses/LICENSE-2.0\n";,
+    "#\n",
+    "# Unless required by applicable law or agreed to in writing,\n",
+    "# software distributed under the License is distributed on an\n",
+    "# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n",
+    "# KIND, either express or implied. See the License for the\n",
+    "# specific language governing permissions and limitations\n",
+    "# under the License"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d97c2e7e-3fcb-4f76-8bb2-5fb3cb8f98b2",
+   "metadata": {},
+   "source": [
+    "# RAG Demo on Opensearch \n",
+    "This notebook provides an example of using opensearch vector Database as 
a knowledge Base for building RAG based AI Application. This notebook provides 
a custom IO python client for Ingestion and Enrichment pipeline using 
opensearch vector DB which can be used for building RAG applications. Please 
follow step by step instructions to be able to write custom IO class for python 
client for opensearch vector database."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a5097361-e9ca-4217-8b41-55b11178fabf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#installing dependencies\n",
+    "!pip install pandas==1.4.4\n",
+    "!pip install numpy==1.24.4\n",
+    "!pip install apache_beam==2.56.0\n",
+    "!pip install opensearch==2.1.0\n",
+    "#used for chunking\n",
+    "!pip install langchain==0.1.14 "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "697ad0fa-7625-4a27-b1b7-72eda94748a2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import apache_beam as beam\n",
+    "import pandas as pd\n",
+    "from apache_beam.ml.transforms.base import MLTransform\n",
+    "from apache_beam.transforms.enrichment import Enrichment\n",
+    "from apache_beam.ml.transforms.embeddings.huggingface import 
SentenceTransformerEmbeddings\n",
+    "import tempfile\n",
+    "import json\n",
+    "\n",
+    "from opensearchpy import OpenSearch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1bc50a4f-9738-4b35-a9ff-af5b76fca0f3",
+   "metadata": {},
+   "source": [
+    "Below imports are the python files which are present in the same folder 
as the notebook. These imports are not part of any beam module."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "df5e71bd-2be8-4477-b06a-028a4ff4a5c2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from opensearch_connector import *\n",
+    "from opensearch_enrichment import *\n",
+    "from chunks_generation import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "3d274890-4e6b-4a3d-b682-9fc6e21e5cca",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'2.56.0'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#To check beam version installed \n",
+    "beam.__version__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "791708ac-1bae-47a2-9c28-ada31d8846b9",
+   "metadata": {},
+   "source": [
+    "Usually users should have their credentials file setup but for 
completeness here we generate the file to be later consumed through right 
channels."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c2a4451f-7c23-42e5-8874-db8a21c8ee83",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cell_str='''OPENSEARCH_USERNAME=\"<REPLACE_WITH_YOUR_USERNAME>\"\n",
+    "OPENSEARCH_PASSWORD=\"<REPLACE_WITH_YOUR_PASSWORD>\"\n",
+    "'''\n",
+    "\n",
+    "with open('credentials.env', 'w') as f:\n",
+    "  f.write(cell_str)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "84d2e424-2747-421b-8ab0-8416992536b6",
+   "metadata": {},
+   "source": [
+    "Next we load the Opensearch credentials that will be needed in access 
during following steps. It reads credentials from environment. Please replace 
the values in the file before running this."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fb1d1cab-da15-48f3-9353-33d8080efd1d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "load_dotenv('credentials.env')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "394e2ebf-9553-4464-b6a3-1386ef57f24b",
+   "metadata": {},
+   "source": [
+    "# Reading json data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "006fdc19-e16a-4142-9653-b0c0a7e63b8e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('hf_small_wikipedia.json', 'r') as j:\n",
+    "     contents = json.loads(j.read())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "213fb96c-a641-4688-b083-96fd762608aa",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "list"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "type(contents)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bdc5047c-8539-4381-84fd-5660f9829a10",
+   "metadata": {},
+   "source": [
+    "# Creating a Search Index¶\n",
+    "### Specify and create a search index in OpenSearch vector DB. \n",
+    "\n",
+    "1. Set some constants for defining our index like the distance metric and 
the index name.\n",
+    "\n",
+    "2. Define the index schema with OpenSearchSearch fields.\n",
+    "\n",
+    "3. Create the index.\n",
+    "\n",
+    "4. Index creation is neeeded only once."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "c32bad0c-c976-46a7-b43c-3887fc964b51",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:PUT https://localhost:9200/embeddings-index [status:200 
request:0.351s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "http_auth = [os.environ['OPENSEARCH_USERNAME'], 
os.environ['OPENSEARCH_PASSWORD']]\n",
+    "client = OpenSearch(hosts = ['https://localhost:9200'],\n",
+    "                    http_auth = http_auth,\n",
+    "                    verify_certs = False\n",
+    "                    )\n",
+    "\n",
+    "index_name = 'embeddings-index'\n",
+    "\n",
+    "index_body = {\n",
+    "  \"settings\": {\n",
+    "      \"index.knn\": True\n",
+    "   },\n",
+    "   \"mappings\": {\n",
+    "      \"properties\": {\n",
+    "         \"title_vector\": {\n",
+    "            \"type\": \"knn_vector\",\n",
+    "            \"dimension\": 384,\n",
+    "            \"method\": {\n",
+    "               \"engine\": \"faiss\",\n",
+    "               \"name\": \"hnsw\"\n",
+    "            }\n",
+    "         },\n",
+    "         \"text_vector\": {\n",
+    "            \"type\": \"knn_vector\",\n",
+    "            \"dimension\": 384,\n",
+    "            \"method\": {\n",
+    "               \"engine\": \"faiss\",\n",
+    "               \"name\": \"hnsw\"\n",
+    "            }\n",
+    "         },\n",
+    "         \"id\": {\n",
+    "            \"type\": \"long\"\n",
+    "         },\n",
+    "         \"url\": {\n",
+    "            \"type\": \"text\"\n",
+    "         },\n",
+    "         \"title\": {\n",
+    "            \"type\": \"text\"\n",
+    "         },\n",
+    "         \"text\": {\n",
+    "            \"type\": \"text\"\n",
+    "         },\n",
+    "         \"section_id\": {\n",
+    "            \"type\": \"long\"\n",
+    "         }\n",
+    "    }\n",
+    "}}\n",
+    "\n",
+    "response = client.indices.create(index_name, body=index_body)\n",
+    "\n",
+    "# Command for deleting an index\n",
+    "# response = client.indices.delete(index = index_name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5ebc23b0-f3e6-484e-bf64-a07c97489ccf",
+   "metadata": {},
+   "source": [
+    "# Creating Knowledge Base in OpenSearch Vector Database¶\n",
+    "After creating a search index, we can load documents into it. We will use 
the same documents we used in the previous cell. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "e4a9d492-e826-4101-a21b-49b88933dce1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:Missing pipeline option (runner). Executing pipeline using 
the default runner: DirectRunner.\n",
+      
"WARNING:apache_beam.runners.interactive.interactive_environment:Dependencies 
required for Interactive Beam PCollection visualization are not available, 
please use: `pip install apache-beam[interactive]` to install necessary 
dependencies to enable all data visualization features.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/javascript": [
+       "\n",
+       "        if (typeof window.interactive_beam_jquery == 'undefined') {\n",
+       "          var jqueryScript = document.createElement('script');\n",
+       "          jqueryScript.src = 
'https://code.jquery.com/jquery-3.4.1.slim.min.js';\n",
+       "          jqueryScript.type = 'text/javascript';\n",
+       "          jqueryScript.onload = function() {\n",
+       "            var datatableScript = document.createElement('script');\n",
+       "            datatableScript.src = 
'https://cdn.datatables.net/1.10.20/js/jquery.dataTables.min.js';\n",
+       "            datatableScript.type = 'text/javascript';\n",
+       "            datatableScript.onload = function() {\n",
+       "              window.interactive_beam_jquery = 
jQuery.noConflict(true);\n",
+       "              
window.interactive_beam_jquery(document).ready(function($){\n",
+       "                \n",
+       "              });\n",
+       "            }\n",
+       "            document.head.appendChild(datatableScript);\n",
+       "          };\n",
+       "          document.head.appendChild(jqueryScript);\n",
+       "        } else {\n",
+       "          
window.interactive_beam_jquery(document).ready(function($){\n",
+       "            \n",
+       "          });\n",
+       "        }"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:This output type hint will be ignored and not used for 
type-checking purposes. Typically, output type hints for a PTransform are 
single (or nested) types wrapped by a PCollection, PDone, or None. Got: 
Union[Tuple[apache_beam.pvalue.PCollection[~MLTransformOutputT], 
apache_beam.pvalue.PCollection[apache_beam.pvalue.Row]], 
apache_beam.pvalue.PCollection[~MLTransformOutputT]] instead.\n",
+      "WARNING:root:This output type hint will be ignored and not used for 
type-checking purposes. Typically, output type hints for a PTransform are 
single (or nested) types wrapped by a PCollection, PDone, or None. Got: 
Union[Tuple[apache_beam.pvalue.PCollection[~MLTransformOutputT], 
apache_beam.pvalue.PCollection[apache_beam.pvalue.Row]], 
apache_beam.pvalue.PCollection[~MLTransformOutputT]] instead.\n",
+      "WARNING:root:This output type hint will be ignored and not used for 
type-checking purposes. Typically, output type hints for a PTransform are 
single (or nested) types wrapped by a PCollection, PDone, or None. Got: 
Union[Tuple[apache_beam.pvalue.PCollection[~MLTransformOutputT], 
apache_beam.pvalue.PCollection[apache_beam.pvalue.Row]], 
apache_beam.pvalue.PCollection[~MLTransformOutputT]] instead.\n",
+      "WARNING:root:This output type hint will be ignored and not used for 
type-checking purposes. Typically, output type hints for a PTransform are 
single (or nested) types wrapped by a PCollection, PDone, or None. Got: 
Union[Tuple[apache_beam.pvalue.PCollection[~MLTransformOutputT], 
apache_beam.pvalue.PCollection[apache_beam.pvalue.Row]], 
apache_beam.pvalue.PCollection[~MLTransformOutputT]] instead.\n",
+      "INFO:apache_beam.runners.worker.statecache:Creating state cache with 
size 104857600\n",
+      "INFO:sentence_transformers.SentenceTransformer:Load pretrained 
SentenceTransformer: all-MiniLM-L6-v2\n",
+      "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: 
mps\n",
+      "2024-08-17 12:28:49.961311: I 
tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is 
optimized to use available CPU instructions in performance-critical 
operations.\n",
+      "To enable the following instructions: AVX2 FMA, in other operations, 
rebuild TensorFlow with the appropriate compiler flags.\n",
+      "INFO:sentence_transformers.SentenceTransformer:Load pretrained 
SentenceTransformer: all-MiniLM-L6-v2\n",
+      "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: 
mps\n",
+      "INFO:sentence_transformers.SentenceTransformer:Load pretrained 
SentenceTransformer: all-MiniLM-L6-v2\n",
+      "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: 
mps\n",
+      "INFO:sentence_transformers.SentenceTransformer:Load pretrained 
SentenceTransformer: all-MiniLM-L6-v2\n",
+      "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: 
mps\n",
+      "INFO:sentence_transformers.SentenceTransformer:Load pretrained 
SentenceTransformer: all-MiniLM-L6-v2\n",
+      "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: 
mps\n",
+      "INFO:sentence_transformers.SentenceTransformer:Load pretrained 
SentenceTransformer: all-MiniLM-L6-v2\n",
+      "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: 
mps\n",
+      "INFO:sentence_transformers.SentenceTransformer:Load pretrained 
SentenceTransformer: all-MiniLM-L6-v2\n",
+      "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: 
mps\n",
+      "INFO:sentence_transformers.SentenceTransformer:Load pretrained 
SentenceTransformer: all-MiniLM-L6-v2\n",
+      "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: 
mps\n",
+      "INFO:sentence_transformers.SentenceTransformer:Load pretrained 
SentenceTransformer: all-MiniLM-L6-v2\n",
+      "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: 
mps\n",
+      "INFO:sentence_transformers.SentenceTransformer:Load pretrained 
SentenceTransformer: all-MiniLM-L6-v2\n",
+      "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: 
mps\n",
+      "INFO:sentence_transformers.SentenceTransformer:Load pretrained 
SentenceTransformer: all-MiniLM-L6-v2\n",
+      "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: 
mps\n",
+      "INFO:sentence_transformers.SentenceTransformer:Load pretrained 
SentenceTransformer: all-MiniLM-L6-v2\n",
+      "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: 
mps\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "071b8c60203342958f3807c3f5a1b6b8",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "63104f55e0c94720a5c5485601dbff16",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9b4a4e31c51142babe8d3bbf2c230abf",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f5cea3eba89c44988808d98e4e729c7d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fb9a2d6f213644968a15da340a34c4ef",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6315ad37d3974517bebca0935cf6e99c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7e4c37bbb6754614903fa78160f1c475",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5eda5bfc904b4463889211a7af6273cc",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "43c4f83cbb3045d59db55800af517673",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e3147202ddbc48ba910a9efd52668eb0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "41b8cf27a55648cb8b99b9325d3ea904",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3554e99fceb1490caf785ecd641c3a2b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      
"/usr/local/lib/python3.10/site-packages/opensearchpy/connection/http_urllib3.py:208:
 UserWarning: Connecting to https://localhost:9200 using SSL with 
verify_certs=False is insecure.\n",
+      "  warnings.warn(\n",
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:1.352s]\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4f1ef21e4566483a9795b73de3cb8954",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "712a6cdbb623459a954288b18f6cc21c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7c78041898154af4a2ade47439b78b72",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1a67fac2e04648198f05ea6462394ae5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c731d48bad8348e785ce9ff2c39a90fb",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2e55dabb036449df8210eb8fae908eb7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "45dc25d5f33c4aff9a56773f88d7a872",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3620aaefbfcc477ba95301ab9c9f2cc6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.071s]\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ff6243888e4b429cb33f197935287e55",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7beaef6725934e3fb735d42ab027325e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.070s]\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6ad77843d0ee41f79d96a1a7b1fd50d5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "96a5145812714e0aa007a1ba9b5454df",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.065s]\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8d33894cc025455b89b0ac550c937aa9",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "28af11f6588d413f922f13d0f4a7922b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.073s]\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5934ed15f3214ff1ad3604006ad8dcfd",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "67fe0f006fb740feb2b310e9a1b7e8c2",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.059s]\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ffd44e5ad2dc4a18a74a7e91ed10f2a0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fba1f0d355454ffcbb106cca28b2c7db",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.088s]\n",
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.091s]\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "62bbea2e1a934ccba6cd2257db413db0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b5bf6d8012314844b3949f575055cf25",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.098s]\n",
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.078s]\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6f19a4a15ba044bfb9e00fbc78bc4fcc",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2966d8a3c63a466a8de4d8e7d8b9b338",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.103s]\n",
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.079s]\n",
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.084s]\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0357acea5ca74775ad179d53a59f924d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4df896cbf002406a9522ca2b47a52677",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.093s]\n",
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.075s]\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d77111294efa4617a644f606dc878394",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "14f1b3d760f94ba6b6c1cd675a4858e0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.076s]\n",
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.079s]\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "117aeddb17854fe98feb0bcafa07a4fa",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4ae68c8860a440ac81c653e6f2d99ad2",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.165s]\n",
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.089s]\n",
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.088s]\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "103621eecd2740f4b93b059294ed4149",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2842a864985b4122ac3efe874508b41f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.077s]\n",
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.060s]\n",
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.067s]\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3b64dffdbc7f4a5eac1f59294757b42e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "270a52af4b2049289e37a35c959cffdd",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.067s]\n",
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.082s]\n",
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.054s]\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b3e7e77046ee47779a3d13bf4eb4ce95",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6fd5f7a817a946c3984d2654f4b3890a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.063s]\n",
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.057s]\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "32caa99b303445fba750bdeb74a7f7ef",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "67513ca1ee544b009b4b4a6dabcaca90",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.066s]\n",
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.057s]\n",
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.070s]\n",
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.086s]\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "38e6a79e798c4a5f853f1c1e305890fa",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f7a816ff0a374558aabac8b005cd90d0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.064s]\n",
+      "INFO:opensearch_connector:Adding Docs in DB: 10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.083s]\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "da1a46a3cb6b4543aad1597240194ad6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6218c7f94a104641846dcef9b01a6075",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:opensearch_connector:Adding Docs in DB: 9\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk?refresh=true 
[status:200 request:0.066s]\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b531195780c441f286906eb685437811",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0f97a99954354e9eb19c340a85508d1c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:BatchElements statistics: element_count=349 batch_count=27 
next_batch_size=4 timings=[(1, 0.18766427040100098), (2, 0.5434770584106445), 
(1, 0.18743109703063965), (2, 0.28671717643737793), (1, 0.180436372756958), (2, 
0.2946739196777344), (1, 0.23954296112060547), (2, 0.2287900447845459), (4, 
0.6388511657714844), (6, 0.7156858444213867), (7, 0.8332958221435547), (13, 
1.1533119678497314), (12, 1.2309019565582275), (14, 1.1411330699920654), (19, 
1.4424619674682617), (19, 1. [...]
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.075s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.087s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.084s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.083s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.076s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.071s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.067s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.074s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.082s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.085s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.071s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.082s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.071s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.067s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.068s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.065s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.068s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.076s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.069s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.062s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.079s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.071s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.065s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.089s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.088s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.072s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.083s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.074s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.066s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.087s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.091s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.096s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.111s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=10\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.149s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n",
+      "INFO:opensearch_connector:Insert Embeddings in opensearch DB, 
count=9\n",
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/_bulk [status:200 
request:0.155s]\n",
+      "INFO:opensearch_connector:Insert Embeddings done\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Insertion Pipeline\n",
+    "import os\n",
+    "\n",
+    "artifact_location = tempfile.mkdtemp()\n",
+    "generate_embedding_fn = 
SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2',\n",
+    "                                                               
columns=['title','text'])\n",
+    "with beam.Pipeline() as p:\n",
+    "    embeddings = (\n",
+    "        p  \n",
+    "        | \"Read data\" >> beam.Create(contents)\n",
+    "        | \"Generate text chunks\" >> ChunksGeneration(chunk_size = 500, 
chunk_overlap = 0, chunking_strategy = ChunkingStrategy.SPLIT_BY_TOKENS)\n",
+    "        | \"Insert document in openSearch\" >> 
InsertDocInOpenSearch(host='https://localhost', 
username=os.environ['OPENSEARCH_USERNAME'], 
password=os.environ['OPENSEARCH_PASSWORD'], port=9200, batch_size = 10)\n",
+    "        | \"Generate Embeddings\" >> 
MLTransform(write_artifact_location=artifact_location).with_transform(generate_embedding_fn)
 \n",
+    "        | \"Insert Embedding in openSearch\" >> 
InsertEmbeddingInOpenSearch(host='https://localhost', 
username=os.environ['OPENSEARCH_USERNAME'], 
password=os.environ['OPENSEARCH_PASSWORD'], port=9200, batch_size = 10, 
embedded_columns=['title','text'])\n",
+    "    )\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c75493ee-e17f-447d-aaac-0c4b0f20d977",
+   "metadata": {},
+   "source": [
+    "### Pipeline Steps:\n",
+    "Now that we have ingested the documents in OpenSearch, we will create a 
embeddings transform, which is used for storing the text and its embedding in 
openSearch vector db"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f546c5fe-44c7-4900-8e36-4dba3784fe2e",
+   "metadata": {},
+   "source": [
+    "# Running Search Queries/ Perform Enrichment"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bcb0d1e2-0f46-47d3-9e08-e3ac15a589ed",
+   "metadata": {},
+   "source": [
+    "### Pipeline Steps:\n",
+    "Create a search transform, which emits the document Id, vector score 
along with the matching text from knowledge base"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "021c77ad-9df7-438a-9fe1-3599fa8c02a2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:Missing pipeline option (runner). Executing pipeline using 
the default runner: DirectRunner.\n",
+      "WARNING:root:This output type hint will be ignored and not used for 
type-checking purposes. Typically, output type hints for a PTransform are 
single (or nested) types wrapped by a PCollection, PDone, or None. Got: 
Union[Tuple[apache_beam.pvalue.PCollection[~MLTransformOutputT], 
apache_beam.pvalue.PCollection[apache_beam.pvalue.Row]], 
apache_beam.pvalue.PCollection[~MLTransformOutputT]] instead.\n",
+      "WARNING:root:This output type hint will be ignored and not used for 
type-checking purposes. Typically, output type hints for a PTransform are 
single (or nested) types wrapped by a PCollection, PDone, or None. Got: 
Union[Tuple[apache_beam.pvalue.PCollection[~MLTransformOutputT], 
apache_beam.pvalue.PCollection[apache_beam.pvalue.Row]], 
apache_beam.pvalue.PCollection[~MLTransformOutputT]] instead.\n",
+      "WARNING:root:This output type hint will be ignored and not used for 
type-checking purposes. Typically, output type hints for a PTransform are 
single (or nested) types wrapped by a PCollection, PDone, or None. Got: 
Union[Tuple[apache_beam.pvalue.PCollection[~MLTransformOutputT], 
apache_beam.pvalue.PCollection[apache_beam.pvalue.Row]], 
apache_beam.pvalue.PCollection[~MLTransformOutputT]] instead.\n",
+      "WARNING:root:This output type hint will be ignored and not used for 
type-checking purposes. Typically, output type hints for a PTransform are 
single (or nested) types wrapped by a PCollection, PDone, or None. Got: 
Union[Tuple[apache_beam.pvalue.PCollection[~MLTransformOutputT], 
apache_beam.pvalue.PCollection[apache_beam.pvalue.Row]], 
apache_beam.pvalue.PCollection[~MLTransformOutputT]] instead.\n",
+      "INFO:apache_beam.runners.worker.statecache:Creating state cache with 
size 104857600\n",
+      "INFO:sentence_transformers.SentenceTransformer:Load pretrained 
SentenceTransformer: all-MiniLM-L6-v2\n",
+      "INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: 
mps\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "17eda44920fd403394c94fbc90598b6a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py:1095: 
InsecureRequestWarning: Unverified HTTPS request is being made to host 
'localhost'. Adding certificate verification is strongly advised. See: 
https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n";,
+      "  warnings.warn(\n",
+      "INFO:opensearch:POST https://localhost:9200/embeddings-index/_search 
[status:200 request:0.315s]\n",
+      "INFO:opensearch_enrichment:Enrichment_results\n",
+      "INFO:root:BatchElements statistics: element_count=1 batch_count=1 
next_batch_size=1 timings=[]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Row(text=[...], docs={'took': 268, 'timed_out': False, '_shards': 
{'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': 
{'value': 1, 'relation': 'eq'}, 'max_score': 0.49309593, 'hits': [{'_index': 
'embeddings-index', '_id': '39', '_score': 0.49309593, '_source': {'url': 
'https://en.wikipedia.org/wiki/Albedo', 'title': 'Albedo', 'text': 'climate 
forcing climatology electromagnetic radiation meteorological quantities 
radiometry scattering, absorption and radiat [...]
+     ]
+    }
+   ],
+   "source": [
+    "# Enchriment Pipeline\n",
+    "\n",
+    "data = [{'text':'What is climate ?'}]\n",
+    "\n",
+    "artifact_location = tempfile.mkdtemp()\n",
+    "generate_embedding_fn = 
SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2',\n",
+    "                                                                
columns=['text'])\n",
+    "\n",
+    "opensearch_handler = 
OpenSearchEnrichmentHandler(opensearch_host='https://localhost', \n",
+    "                                                 opensearch_port=9200, 
\n",
+    "                                                 
username=os.environ['OPENSEARCH_USERNAME'], \n",
+    "                                                 
password=os.environ['OPENSEARCH_PASSWORD'])\n",
+    "                                       \n",
+    "\n",
+    "with beam.Pipeline() as p:\n",
+    "  _ = (\n",
+    "      p\n",
+    "      | \"Create\" >> beam.Create(data)\n",
+    "      | \"Generate Embedding\" >> 
MLTransform(write_artifact_location=artifact_location).with_transform(generate_embedding_fn)\n",
+    "      | \"Enrich W/ OpenSearch\" >> Enrichment(opensearch_handler)\n",
+    "      | \"Print\" >> beam.Map(print)\n",
+    "  )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f92d730b-2c34-4c3e-a8a5-5b37a19eb3c4",
+   "metadata": {},
+   "source": [
+    "# Conclusion\n",
+    "Here we have demonstrated how we can implement Ingestion and Enrichment 
pipeline using OpenSearch vector DB by using ML Transfrom's 
SentenceTransformerEmbeddings for generating the embeddings of the text chunks."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

Reply via email to