This is an automated email from the ASF dual-hosted git repository. ferruzzi pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push: new 4d0c7242bc Small refactor for example_bedrock_knowledge_base.py (#39672) 4d0c7242bc is described below commit 4d0c7242bcc8a403c03edd993b8b445a51720492 Author: D. Ferruzzi <ferru...@amazon.com> AuthorDate: Thu May 16 14:43:48 2024 -0700 Small refactor for example_bedrock_knowledge_base.py (#39672) - Renamed example_bedrock_retrieve_and_generate.py, a more accurate and descriptive name - Added an example for invoking Claude models and added that to the docs --- .../operators/bedrock.rst | 34 ++++++++++++++-------- .../operators/opensearchserverless.rst | 2 +- ...py => example_bedrock_retrieve_and_generate.py} | 27 ++++++++++++----- 3 files changed, 43 insertions(+), 20 deletions(-) diff --git a/docs/apache-airflow-providers-amazon/operators/bedrock.rst b/docs/apache-airflow-providers-amazon/operators/bedrock.rst index 1808f5138c..daf9301565 100644 --- a/docs/apache-airflow-providers-amazon/operators/bedrock.rst +++ b/docs/apache-airflow-providers-amazon/operators/bedrock.rst @@ -46,7 +46,10 @@ Invoke an existing Amazon Bedrock Model To invoke an existing Amazon Bedrock model, you can use :class:`~airflow.providers.amazon.aws.operators.bedrock.BedrockInvokeModelOperator`. -Note that every model family has different input and output formats. +Note that every model family has different input and output formats. Some examples are included below, but +for details on the different formats, see +`Inference parameters for foundation models <https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters.html>`__ + For example, to invoke a Meta Llama model you would use: .. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock.py @@ -63,7 +66,14 @@ To invoke an Amazon Titan model you would use: :start-after: [START howto_operator_invoke_titan_model] :end-before: [END howto_operator_invoke_titan_model] -For details on the different formats, see `Inference parameters for foundation models <https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters.html>`__ +To invoke a Claude V2 model using the Completions API you would use: + +.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py + :language: python + :dedent: 4 + :start-after: [START howto_operator_invoke_claude_model] + :end-before: [END howto_operator_invoke_claude_model] + .. _howto/operator:BedrockCustomizeModelOperator: @@ -119,7 +129,7 @@ To create an Amazon Bedrock Knowledge Base, you can use For more information on which models support embedding data into a vector store, see https://docs.aws.amazon.com/bedrock/latest/userguide/knowledge-base-supported.html -.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_knowledge_base.py +.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py :language: python :dedent: 4 :start-after: [START howto_operator_bedrock_create_knowledge_base] @@ -132,7 +142,7 @@ Delete an Amazon Bedrock Knowledge Base Deleting a Knowledge Base is a simple boto API call and can be done in a TaskFlow task like the example below. -.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_knowledge_base.py +.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py :language: python :start-after: [START howto_operator_bedrock_delete_knowledge_base] :end-before: [END howto_operator_bedrock_delete_knowledge_base] @@ -145,7 +155,7 @@ Create an Amazon Bedrock Data Source To create an Amazon Bedrock Data Source, you can use :class:`~airflow.providers.amazon.aws.operators.bedrock.BedrockCreateDataSourceOperator`. -.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_knowledge_base.py +.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py :language: python :dedent: 4 :start-after: [START howto_operator_bedrock_create_data_source] @@ -158,7 +168,7 @@ Delete an Amazon Bedrock Data Source Deleting a Data Source is a simple boto API call and can be done in a TaskFlow task like the example below. -.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_knowledge_base.py +.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py :language: python :start-after: [START howto_operator_bedrock_delete_data_source] :end-before: [END howto_operator_bedrock_delete_data_source] @@ -171,7 +181,7 @@ Ingest data into an Amazon Bedrock Data Source To add data from an Amazon S3 bucket into an Amazon Bedrock Data Source, you can use :class:`~airflow.providers.amazon.aws.operators.bedrock.BedrockIngestDataOperator`. -.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_knowledge_base.py +.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py :language: python :dedent: 4 :start-after: [START howto_operator_bedrock_ingest_data] @@ -191,7 +201,7 @@ would like to pass the results through an LLM in order to generate a text respon For more information on which models support retrieving information from a knowledge base, see https://docs.aws.amazon.com/bedrock/latest/userguide/knowledge-base-supported.html -.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_knowledge_base.py +.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py :language: python :dedent: 4 :start-after: [START howto_operator_bedrock_retrieve] @@ -213,7 +223,7 @@ NOTE: Support for "external sources" was added in boto 1.34.90 Example using an Amazon Bedrock Knowledge Base: -.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_knowledge_base.py +.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py :language: python :dedent: 4 :start-after: [START howto_operator_bedrock_knowledge_base_rag] @@ -221,7 +231,7 @@ Example using an Amazon Bedrock Knowledge Base: Example using a PDF file in an Amazon S3 Bucket: -.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_knowledge_base.py +.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py :language: python :dedent: 4 :start-after: [START howto_operator_bedrock_external_sources_rag] @@ -268,7 +278,7 @@ Wait for an Amazon Bedrock Knowledge Base To wait on the state of an Amazon Bedrock Knowledge Base until it reaches a terminal state you can use :class:`~airflow.providers.amazon.aws.sensors.bedrock.BedrockKnowledgeBaseActiveSensor` -.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_knowledge_base.py +.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py :language: python :dedent: 4 :start-after: [START howto_sensor_bedrock_knowledge_base_active] @@ -282,7 +292,7 @@ Wait for an Amazon Bedrock ingestion job to finish To wait on the state of an Amazon Bedrock data ingestion job until it reaches a terminal state you can use :class:`~airflow.providers.amazon.aws.sensors.bedrock.BedrockIngestionJobSensor` -.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_knowledge_base.py +.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py :language: python :dedent: 4 :start-after: [START howto_sensor_bedrock_ingest_data] diff --git a/docs/apache-airflow-providers-amazon/operators/opensearchserverless.rst b/docs/apache-airflow-providers-amazon/operators/opensearchserverless.rst index f947c91e30..4fb0b4db93 100644 --- a/docs/apache-airflow-providers-amazon/operators/opensearchserverless.rst +++ b/docs/apache-airflow-providers-amazon/operators/opensearchserverless.rst @@ -46,7 +46,7 @@ Wait for an Amazon OpenSearch Serverless Collection to become active To wait on the state of an Amazon Bedrock customize model job until it reaches a terminal state you can use :class:`~airflow.providers.amazon.aws.sensors.bedrock.OpenSearchServerlessCollectionActiveSensor` -.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_knowledge_base.py +.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py :language: python :dedent: 4 :start-after: [START howto_sensor_opensearch_collection_active] diff --git a/tests/system/providers/amazon/aws/example_bedrock_knowledge_base.py b/tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py similarity index 96% rename from tests/system/providers/amazon/aws/example_bedrock_knowledge_base.py rename to tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py index b0a333382a..c544cee7f8 100644 --- a/tests/system/providers/amazon/aws/example_bedrock_knowledge_base.py +++ b/tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py @@ -44,6 +44,7 @@ from airflow.providers.amazon.aws.operators.bedrock import ( BedrockCreateDataSourceOperator, BedrockCreateKnowledgeBaseOperator, BedrockIngestDataOperator, + BedrockInvokeModelOperator, BedrockRaGOperator, BedrockRetrieveOperator, ) @@ -61,12 +62,14 @@ from airflow.utils.helpers import chain from airflow.utils.trigger_rule import TriggerRule from tests.system.providers.amazon.aws.utils import SystemTestContextBuilder -########################################################################################################### +####################################################################### # NOTE: -# The account running this test must first manually request access to the `Titan Embeddings G1 - Text` -# and `Anthropic Claude v2.0` foundation models via the Bedrock console. Gaining access to the models -# can take 24 hours from the time of request. -########################################################################################################### +# Access to the following foundation model must be requested via +# the Amazon Bedrock console and may take up to 24 hours to apply: +####################################################################### + +CLAUDE_MODEL_ID = "anthropic.claude-v2" +TITAN_MODEL_ID = "amazon.titan-embed-text-v1" # Externally fetched variables: ROLE_ARN_KEY = "ROLE_ARN" @@ -462,11 +465,20 @@ with DAG( ) # [END howto_sensor_opensearch_collection_active] + PROMPT = "What color is an orange?" + # [START howto_operator_invoke_claude_model] + invoke_claude_completions = BedrockInvokeModelOperator( + task_id="invoke_claude_completions", + model_id=CLAUDE_MODEL_ID, + input_data={"max_tokens_to_sample": 4000, "prompt": f"\n\nHuman: {PROMPT}\n\nAssistant:"}, + ) + # [END howto_operator_invoke_claude_model] + # [START howto_operator_bedrock_create_knowledge_base] create_knowledge_base = BedrockCreateKnowledgeBaseOperator( task_id="create_knowledge_base", name=knowledge_base_name, - embedding_model_arn=f"arn:aws:bedrock:{region_name}::foundation-model/amazon.titan-embed-text-v1", + embedding_model_arn=f"arn:aws:bedrock:{region_name}::foundation-model/{TITAN_MODEL_ID}", role_arn=test_context[ROLE_ARN_KEY], storage_config={ "type": "OPENSEARCH_SERVERLESS", @@ -522,7 +534,7 @@ with DAG( task_id="knowledge_base_rag", input="Who was the CEO of Amazon on 2022?", source_type="KNOWLEDGE_BASE", - model_arn=f"arn:aws:bedrock:{region_name}::foundation-model/anthropic.claude-v2", + model_arn=f"arn:aws:bedrock:{region_name}::foundation-model/{CLAUDE_MODEL_ID}", knowledge_base_id=create_knowledge_base.output, ) # [END howto_operator_bedrock_knowledge_base_rag] @@ -552,6 +564,7 @@ with DAG( create_vector_index(index_name=index_name, collection_id=collection, region=region_name), copy_data_to_s3(bucket=bucket_name), # TEST BODY + invoke_claude_completions, create_knowledge_base, await_knowledge_base, create_data_source,