damccorm commented on code in PR #24529:
URL: https://github.com/apache/beam/pull/24529#discussion_r1040118843
##########
examples/notebooks/beam-ml/run_inference_tensorflow_hub.ipynb:
##########
@@ -0,0 +1,307 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "634de4ce-07f7-4003-8ff6-d3f58b41a79d"
+ },
+ "source": [
+ "##### Copyright 2022 Google Inc.\n",
+ "\n",
+ "Licensed under the Apache License, Version 2.0 (the \"License\").\n",
+ "<!--\n",
+ " Licensed to the Apache Software Foundation (ASF) under one\n",
+ " or more contributor license agreements. See the NOTICE file\n",
+ " distributed with this work for additional information\n",
+ " regarding copyright ownership. The ASF licenses this file\n",
+ " to you under the Apache License, Version 2.0 (the\n",
+ " \"License\"); you may not use this file except in compliance\n",
+ " with the License. You may obtain a copy of the License at\n",
+ "\n",
+ " http://www.apache.org/licenses/LICENSE-2.0\n",
+ "\n",
+ " Unless required by applicable law or agreed to in writing,\n",
+ " software distributed under the License is distributed on an\n",
+ " \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n",
+ " KIND, either express or implied. See the License for the\n",
+ " specific language governing permissions and limitations\n",
+ " under the License.\n",
+ "-->\n"
+ ],
+ "id": "634de4ce-07f7-4003-8ff6-d3f58b41a79d"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3ac8fc4a-a0ef-47b9-bd80-10801eebe13e"
+ },
+ "source": [
+ "# RunInference with Sentenced T5 Model from TensorFlow Hub\n",
+ "\n",
+ "\n",
+ "In this notebook, we walk through the use of the RunInference
transform with a [sentence encoder built on T5
model](https://tfhub.dev/google/sentence-t5/st5-base/1) and testing it locally
with Interactive Runner.\n"
+ ],
+ "id": "3ac8fc4a-a0ef-47b9-bd80-10801eebe13e"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3402ecc9-28d6-4226-99b1-147a2d23b7a9"
+ },
+ "source": [
+ "## Install and import packages."
+ ],
+ "id": "3402ecc9-28d6-4226-99b1-147a2d23b7a9"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "b22e51ad-cbf2-4dfe-a1c6-3ad17d5a3749",
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "!pip install apache_beam[gcp]==2.41.0\n",
+ "!pip install apache-beam[interactive]==2.41.0\n",
+ "!pip install tensorflow==2.10.0\n",
+ "!pip install tensorflow_text==2.10.0\n",
+ "!pip install keras==2.10.0\n",
+ "!pip install tfx_bsl==1.10.0\n",
+ "!pip install pillow==8.4.0"
+ ],
+ "id": "b22e51ad-cbf2-4dfe-a1c6-3ad17d5a3749"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "f313a508-59ea-47ed-86eb-c9c8e67785f2",
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import importlib\n",
+ "\n",
+ "import numpy as np\n",
+ "import tensorflow as tf\n",
+ "import tensorflow_hub as hub\n",
+ "import tensorflow_text\n",
+ "\n",
+ "from tensorflow import keras\n",
+ "\n",
+ "from typing import Any\n",
+ "from typing import Dict\n",
+ "from typing import Iterable\n",
+ "from typing import Optional\n",
+ "from typing import Sequence\n",
+ "\n",
+ "import apache_beam as beam\n",
+ "import apache_beam.runners.interactive.interactive_beam as ib\n",
+ "\n",
+ "from apache_beam.ml.inference.base import RunInference\n",
+ "from apache_beam.ml.inference.base import ModelHandler\n",
+ "from apache_beam.ml.inference.base import PredictionResult\n",
+ "from apache_beam.internal import pickler\n",
+ "from apache_beam.runners.runner import PipelineResult\n",
+ "from apache_beam.runners.interactive.interactive_runner import
InteractiveRunner\n",
+ "\n",
+ "from tfx_bsl.public.beam.run_inference import CreateModelHandler\n",
+ "from tfx_bsl.public.proto import model_spec_pb2"
+ ],
+ "id": "f313a508-59ea-47ed-86eb-c9c8e67785f2"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "74db0203-3d26-4bc4-8271-81fad9756297"
+ },
+ "source": [
+ "## Create a Keras Model from TensorFlow Hub Image"
+ ],
+ "id": "74db0203-3d26-4bc4-8271-81fad9756297"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "r1fgn5I_-mzA"
+ },
+ "outputs": [],
+ "source": [
+ "from google.colab import auth\n",
+ "auth.authenticate_user()"
+ ],
+ "id": "r1fgn5I_-mzA"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "2ff8e394-f577-4dea-bef9-a4f4528c1378"
+ },
+ "outputs": [],
+ "source": [
+ "PROJECT_ID = '<Project Id>'\n",
+ "GCS_BUCKET = '<GCS Bucket>'\n",
+ "\n",
+ "MODEL_PATH = f'{GCS_BUCKET}/st5-base/1'"
+ ],
+ "id": "2ff8e394-f577-4dea-bef9-a4f4528c1378"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "ccaede25-1c1a-4ec4-9296-25c9a2ac43d7"
+ },
+ "outputs": [],
+ "source": [
+ "inp = tf.keras.layers.Input(shape=[], dtype=tf.string,
name='input')\n",
+ "hub_url = \"https://tfhub.dev/google/sentence-t5/st5-base/1\"\n",
+ "imported = hub.KerasLayer(hub_url)\n",
+ "outp = imported(inp)\n",
+ "model = tf.keras.Model(inp, outp)\n",
+ "\n",
+ "# Sentenced-T5 model returns a 768-dimensional vector for an English
text input.\n",
+ "# Note the 'input' that we will pass in as example's feature key
name.\n",
+ "model.summary()"
+ ],
+ "id": "ccaede25-1c1a-4ec4-9296-25c9a2ac43d7"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "29803d5b-93b9-41fc-b414-f7c737c5d7bc"
+ },
+ "source": [
+ "## Save the model with a TF function definition for RunInference()"
+ ],
+ "id": "29803d5b-93b9-41fc-b414-f7c737c5d7bc"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "704abfca-5956-4fc1-9717-4c6d5bf2db8e"
+ },
+ "outputs": [],
+ "source": [
+ "RAW_DATA_PREDICT_SPEC = {\n",
+ " 'input': tf.io.FixedLenFeature([], tf.string),\n",
+ "}\n",
+ "\n",
+ "@tf.function(input_signature=[tf.TensorSpec(shape=[None],
dtype=tf.string)])\n",
+ "def call(serialized_examples):\n",
+ " features = tf.io.parse_example(serialized_examples,
RAW_DATA_PREDICT_SPEC)\n",
+ " return model(features)\n",
+ "\n",
+ "tf.saved_model.save(model, MODEL_PATH, signatures={'serving_default':
call})"
+ ],
+ "id": "704abfca-5956-4fc1-9717-4c6d5bf2db8e"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "7b56569d-e540-44ed-a46a-9cec886522f6"
+ },
+ "source": [
+ "## Create and test a RunInference pipeline locally"
+ ],
+ "id": "7b56569d-e540-44ed-a46a-9cec886522f6"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "fad13b30-b159-425c-8c15-a41301abd3a4"
+ },
+ "outputs": [],
+ "source": [
+ "# Creates TensorFlow Example to feed to the ModelHandler.\n",
+ "class ExampleProcessor:\n",
+ " def create_example(self, feature: tf.string):\n",
+ " return tf.train.Example(\n",
+ " features=tf.train.Features(\n",
+ " feature={'input' :
self.create_feature(feature)})\n",
+ " )\n",
+ "\n",
+ " def create_feature(self, element: tf.string):\n",
+ " return
tf.train.Feature(bytes_list=tf.train.BytesList(value=[element.encode()], ))\n"
+ ],
+ "id": "fad13b30-b159-425c-8c15-a41301abd3a4"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "60380ebd-5bc8-4bc6-9cf4-3030bf687367",
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "saved_model_spec =
model_spec_pb2.SavedModelSpec(model_path=MODEL_PATH)\n",
+ "inferece_spec_type =
model_spec_pb2.InferenceSpecType(saved_model_spec=saved_model_spec)\n",
+ "model_handler = CreateModelHandler(inferece_spec_type)\n",
+ "\n",
+ "questions = [\n",
+ " 'what is the official slogan for the 2018 winter olympics?',\n",
+ "]\n",
+ "\n",
+ "pipeline = beam.Pipeline(InteractiveRunner())\n",
+ "\n",
+ "inference = (pipeline | 'CreateSentences' >>
beam.Create(questions)\n",
+ " | 'Convert input to Tensor' >> beam.Map(lambda x:
ExampleProcessor().create_example(x))\n",
+ " | 'RunInference with T5' >>
RunInference(model_handler))"
+ ],
+ "id": "60380ebd-5bc8-4bc6-9cf4-3030bf687367"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "29a84182-baa5-45c4-abcf-d9cab84835c9"
+ },
+ "outputs": [],
+ "source": [
+ "ib.show(inference)"
+ ],
+ "id": "29a84182-baa5-45c4-abcf-d9cab84835c9"
+ },
+ {
+ "cell_type": "code",
+ "source": [],
+ "metadata": {
+ "id": "AxGlAcYboi3X"
+ },
+ "id": "AxGlAcYboi3X",
+ "execution_count": null,
+ "outputs": []
+ }
Review Comment:
Can we cut this cell? Looks like it adds an extra block -
https://github.com/apache/beam/blob/3de2f85b3439a7084b60eb600a56bdc4cd36525c/examples/notebooks/beam-ml/run_inference_tensorflow_hub.ipynb
##########
examples/notebooks/beam-ml/run_inference_tensorflow_hub.ipynb:
##########
@@ -0,0 +1,307 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "634de4ce-07f7-4003-8ff6-d3f58b41a79d"
+ },
+ "source": [
+ "##### Copyright 2022 Google Inc.\n",
+ "\n",
+ "Licensed under the Apache License, Version 2.0 (the \"License\").\n",
+ "<!--\n",
+ " Licensed to the Apache Software Foundation (ASF) under one\n",
+ " or more contributor license agreements. See the NOTICE file\n",
+ " distributed with this work for additional information\n",
+ " regarding copyright ownership. The ASF licenses this file\n",
+ " to you under the Apache License, Version 2.0 (the\n",
+ " \"License\"); you may not use this file except in compliance\n",
+ " with the License. You may obtain a copy of the License at\n",
+ "\n",
+ " http://www.apache.org/licenses/LICENSE-2.0\n",
+ "\n",
+ " Unless required by applicable law or agreed to in writing,\n",
+ " software distributed under the License is distributed on an\n",
+ " \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n",
+ " KIND, either express or implied. See the License for the\n",
+ " specific language governing permissions and limitations\n",
+ " under the License.\n",
+ "-->\n"
+ ],
+ "id": "634de4ce-07f7-4003-8ff6-d3f58b41a79d"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3ac8fc4a-a0ef-47b9-bd80-10801eebe13e"
+ },
+ "source": [
+ "# RunInference with Sentenced T5 Model from TensorFlow Hub\n",
+ "\n",
+ "\n",
+ "In this notebook, we walk through the use of the RunInference
transform with a [sentence encoder built on T5
model](https://tfhub.dev/google/sentence-t5/st5-base/1) and testing it locally
with Interactive Runner.\n"
+ ],
+ "id": "3ac8fc4a-a0ef-47b9-bd80-10801eebe13e"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3402ecc9-28d6-4226-99b1-147a2d23b7a9"
+ },
+ "source": [
+ "## Install and import packages."
+ ],
+ "id": "3402ecc9-28d6-4226-99b1-147a2d23b7a9"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "b22e51ad-cbf2-4dfe-a1c6-3ad17d5a3749",
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "!pip install apache_beam[gcp]==2.41.0\n",
+ "!pip install apache-beam[interactive]==2.41.0\n",
+ "!pip install tensorflow==2.10.0\n",
+ "!pip install tensorflow_text==2.10.0\n",
+ "!pip install keras==2.10.0\n",
+ "!pip install tfx_bsl==1.10.0\n",
+ "!pip install pillow==8.4.0"
+ ],
+ "id": "b22e51ad-cbf2-4dfe-a1c6-3ad17d5a3749"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "f313a508-59ea-47ed-86eb-c9c8e67785f2",
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import importlib\n",
+ "\n",
+ "import numpy as np\n",
+ "import tensorflow as tf\n",
+ "import tensorflow_hub as hub\n",
+ "import tensorflow_text\n",
+ "\n",
+ "from tensorflow import keras\n",
+ "\n",
+ "from typing import Any\n",
+ "from typing import Dict\n",
+ "from typing import Iterable\n",
+ "from typing import Optional\n",
+ "from typing import Sequence\n",
Review Comment:
Could you please pare this down to the minimal set of dependencies used? For
example, I don't think we use these typings
##########
examples/notebooks/beam-ml/run_inference_tensorflow_hub.ipynb:
##########
@@ -0,0 +1,307 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "634de4ce-07f7-4003-8ff6-d3f58b41a79d"
+ },
+ "source": [
+ "##### Copyright 2022 Google Inc.\n",
Review Comment:
+1, I don't think we're allowed to accept a google license into the code base
##########
examples/notebooks/beam-ml/run_inference_tensorflow_hub.ipynb:
##########
@@ -0,0 +1,307 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "634de4ce-07f7-4003-8ff6-d3f58b41a79d"
+ },
+ "source": [
+ "##### Copyright 2022 Google Inc.\n",
+ "\n",
+ "Licensed under the Apache License, Version 2.0 (the \"License\").\n",
+ "<!--\n",
+ " Licensed to the Apache Software Foundation (ASF) under one\n",
+ " or more contributor license agreements. See the NOTICE file\n",
+ " distributed with this work for additional information\n",
+ " regarding copyright ownership. The ASF licenses this file\n",
+ " to you under the Apache License, Version 2.0 (the\n",
+ " \"License\"); you may not use this file except in compliance\n",
+ " with the License. You may obtain a copy of the License at\n",
+ "\n",
+ " http://www.apache.org/licenses/LICENSE-2.0\n",
+ "\n",
+ " Unless required by applicable law or agreed to in writing,\n",
+ " software distributed under the License is distributed on an\n",
+ " \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n",
+ " KIND, either express or implied. See the License for the\n",
+ " specific language governing permissions and limitations\n",
+ " under the License.\n",
+ "-->\n"
+ ],
+ "id": "634de4ce-07f7-4003-8ff6-d3f58b41a79d"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3ac8fc4a-a0ef-47b9-bd80-10801eebe13e"
+ },
+ "source": [
+ "# RunInference with Sentenced T5 Model from TensorFlow Hub\n",
+ "\n",
+ "\n",
+ "In this notebook, we walk through the use of the RunInference
transform with a [sentence encoder built on T5
model](https://tfhub.dev/google/sentence-t5/st5-base/1) and testing it locally
with Interactive Runner.\n"
+ ],
+ "id": "3ac8fc4a-a0ef-47b9-bd80-10801eebe13e"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3402ecc9-28d6-4226-99b1-147a2d23b7a9"
+ },
+ "source": [
+ "## Install and import packages."
+ ],
+ "id": "3402ecc9-28d6-4226-99b1-147a2d23b7a9"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "b22e51ad-cbf2-4dfe-a1c6-3ad17d5a3749",
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "!pip install apache_beam[gcp]==2.41.0\n",
+ "!pip install apache-beam[interactive]==2.41.0\n",
+ "!pip install tensorflow==2.10.0\n",
+ "!pip install tensorflow_text==2.10.0\n",
+ "!pip install keras==2.10.0\n",
+ "!pip install tfx_bsl==1.10.0\n",
+ "!pip install pillow==8.4.0"
+ ],
+ "id": "b22e51ad-cbf2-4dfe-a1c6-3ad17d5a3749"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "f313a508-59ea-47ed-86eb-c9c8e67785f2",
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import importlib\n",
+ "\n",
+ "import numpy as np\n",
+ "import tensorflow as tf\n",
+ "import tensorflow_hub as hub\n",
+ "import tensorflow_text\n",
+ "\n",
+ "from tensorflow import keras\n",
+ "\n",
+ "from typing import Any\n",
+ "from typing import Dict\n",
+ "from typing import Iterable\n",
+ "from typing import Optional\n",
+ "from typing import Sequence\n",
+ "\n",
+ "import apache_beam as beam\n",
+ "import apache_beam.runners.interactive.interactive_beam as ib\n",
+ "\n",
+ "from apache_beam.ml.inference.base import RunInference\n",
+ "from apache_beam.ml.inference.base import ModelHandler\n",
+ "from apache_beam.ml.inference.base import PredictionResult\n",
+ "from apache_beam.internal import pickler\n",
+ "from apache_beam.runners.runner import PipelineResult\n",
+ "from apache_beam.runners.interactive.interactive_runner import
InteractiveRunner\n",
+ "\n",
+ "from tfx_bsl.public.beam.run_inference import CreateModelHandler\n",
+ "from tfx_bsl.public.proto import model_spec_pb2"
+ ],
+ "id": "f313a508-59ea-47ed-86eb-c9c8e67785f2"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "74db0203-3d26-4bc4-8271-81fad9756297"
+ },
+ "source": [
+ "## Create a Keras Model from TensorFlow Hub Image"
+ ],
+ "id": "74db0203-3d26-4bc4-8271-81fad9756297"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "r1fgn5I_-mzA"
+ },
+ "outputs": [],
+ "source": [
+ "from google.colab import auth\n",
+ "auth.authenticate_user()"
+ ],
+ "id": "r1fgn5I_-mzA"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "2ff8e394-f577-4dea-bef9-a4f4528c1378"
+ },
+ "outputs": [],
+ "source": [
+ "PROJECT_ID = '<Project Id>'\n",
+ "GCS_BUCKET = '<GCS Bucket>'\n",
+ "\n",
+ "MODEL_PATH = f'{GCS_BUCKET}/st5-base/1'"
+ ],
+ "id": "2ff8e394-f577-4dea-bef9-a4f4528c1378"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "ccaede25-1c1a-4ec4-9296-25c9a2ac43d7"
+ },
+ "outputs": [],
+ "source": [
+ "inp = tf.keras.layers.Input(shape=[], dtype=tf.string,
name='input')\n",
+ "hub_url = \"https://tfhub.dev/google/sentence-t5/st5-base/1\"\n",
+ "imported = hub.KerasLayer(hub_url)\n",
+ "outp = imported(inp)\n",
+ "model = tf.keras.Model(inp, outp)\n",
+ "\n",
+ "# Sentenced-T5 model returns a 768-dimensional vector for an English
text input.\n",
+ "# Note the 'input' that we will pass in as example's feature key
name.\n",
+ "model.summary()"
+ ],
+ "id": "ccaede25-1c1a-4ec4-9296-25c9a2ac43d7"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "29803d5b-93b9-41fc-b414-f7c737c5d7bc"
+ },
+ "source": [
+ "## Save the model with a TF function definition for RunInference()"
+ ],
+ "id": "29803d5b-93b9-41fc-b414-f7c737c5d7bc"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "704abfca-5956-4fc1-9717-4c6d5bf2db8e"
+ },
+ "outputs": [],
+ "source": [
+ "RAW_DATA_PREDICT_SPEC = {\n",
+ " 'input': tf.io.FixedLenFeature([], tf.string),\n",
+ "}\n",
+ "\n",
+ "@tf.function(input_signature=[tf.TensorSpec(shape=[None],
dtype=tf.string)])\n",
+ "def call(serialized_examples):\n",
+ " features = tf.io.parse_example(serialized_examples,
RAW_DATA_PREDICT_SPEC)\n",
+ " return model(features)\n",
+ "\n",
+ "tf.saved_model.save(model, MODEL_PATH, signatures={'serving_default':
call})"
+ ],
+ "id": "704abfca-5956-4fc1-9717-4c6d5bf2db8e"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "7b56569d-e540-44ed-a46a-9cec886522f6"
+ },
+ "source": [
+ "## Create and test a RunInference pipeline locally"
Review Comment:
I think this is actually somewhat clear if you look at how the notebook
renders - this applies to all remaining sections -
https://github.com/apache/beam/blob/3de2f85b3439a7084b60eb600a56bdc4cd36525c/examples/notebooks/beam-ml/run_inference_tensorflow_hub.ipynb
##########
examples/notebooks/beam-ml/run_inference_tensorflow_hub.ipynb:
##########
@@ -0,0 +1,307 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "634de4ce-07f7-4003-8ff6-d3f58b41a79d"
+ },
+ "source": [
+ "##### Copyright 2022 Google Inc.\n",
+ "\n",
+ "Licensed under the Apache License, Version 2.0 (the \"License\").\n",
+ "<!--\n",
+ " Licensed to the Apache Software Foundation (ASF) under one\n",
+ " or more contributor license agreements. See the NOTICE file\n",
+ " distributed with this work for additional information\n",
+ " regarding copyright ownership. The ASF licenses this file\n",
+ " to you under the Apache License, Version 2.0 (the\n",
+ " \"License\"); you may not use this file except in compliance\n",
+ " with the License. You may obtain a copy of the License at\n",
+ "\n",
+ " http://www.apache.org/licenses/LICENSE-2.0\n",
+ "\n",
+ " Unless required by applicable law or agreed to in writing,\n",
+ " software distributed under the License is distributed on an\n",
+ " \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n",
+ " KIND, either express or implied. See the License for the\n",
+ " specific language governing permissions and limitations\n",
+ " under the License.\n",
+ "-->\n"
+ ],
+ "id": "634de4ce-07f7-4003-8ff6-d3f58b41a79d"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3ac8fc4a-a0ef-47b9-bd80-10801eebe13e"
+ },
+ "source": [
+ "# RunInference with Sentenced T5 Model from TensorFlow Hub\n",
+ "\n",
+ "\n",
+ "In this notebook, we walk through the use of the RunInference
transform with a [sentence encoder built on T5
model](https://tfhub.dev/google/sentence-t5/st5-base/1) and testing it locally
with Interactive Runner.\n"
+ ],
+ "id": "3ac8fc4a-a0ef-47b9-bd80-10801eebe13e"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3402ecc9-28d6-4226-99b1-147a2d23b7a9"
+ },
+ "source": [
+ "## Install and import packages."
+ ],
+ "id": "3402ecc9-28d6-4226-99b1-147a2d23b7a9"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "b22e51ad-cbf2-4dfe-a1c6-3ad17d5a3749",
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "!pip install apache_beam[gcp]==2.41.0\n",
+ "!pip install apache-beam[interactive]==2.41.0\n",
+ "!pip install tensorflow==2.10.0\n",
+ "!pip install tensorflow_text==2.10.0\n",
+ "!pip install keras==2.10.0\n",
+ "!pip install tfx_bsl==1.10.0\n",
+ "!pip install pillow==8.4.0"
+ ],
+ "id": "b22e51ad-cbf2-4dfe-a1c6-3ad17d5a3749"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "f313a508-59ea-47ed-86eb-c9c8e67785f2",
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import importlib\n",
+ "\n",
+ "import numpy as np\n",
+ "import tensorflow as tf\n",
+ "import tensorflow_hub as hub\n",
+ "import tensorflow_text\n",
+ "\n",
+ "from tensorflow import keras\n",
+ "\n",
+ "from typing import Any\n",
+ "from typing import Dict\n",
+ "from typing import Iterable\n",
+ "from typing import Optional\n",
+ "from typing import Sequence\n",
+ "\n",
+ "import apache_beam as beam\n",
+ "import apache_beam.runners.interactive.interactive_beam as ib\n",
+ "\n",
+ "from apache_beam.ml.inference.base import RunInference\n",
+ "from apache_beam.ml.inference.base import ModelHandler\n",
+ "from apache_beam.ml.inference.base import PredictionResult\n",
+ "from apache_beam.internal import pickler\n",
+ "from apache_beam.runners.runner import PipelineResult\n",
+ "from apache_beam.runners.interactive.interactive_runner import
InteractiveRunner\n",
+ "\n",
+ "from tfx_bsl.public.beam.run_inference import CreateModelHandler\n",
+ "from tfx_bsl.public.proto import model_spec_pb2"
+ ],
+ "id": "f313a508-59ea-47ed-86eb-c9c8e67785f2"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "74db0203-3d26-4bc4-8271-81fad9756297"
+ },
+ "source": [
+ "## Create a Keras Model from TensorFlow Hub Image"
+ ],
+ "id": "74db0203-3d26-4bc4-8271-81fad9756297"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "r1fgn5I_-mzA"
+ },
+ "outputs": [],
+ "source": [
+ "from google.colab import auth\n",
+ "auth.authenticate_user()"
+ ],
+ "id": "r1fgn5I_-mzA"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "2ff8e394-f577-4dea-bef9-a4f4528c1378"
+ },
+ "outputs": [],
+ "source": [
+ "PROJECT_ID = '<Project Id>'\n",
+ "GCS_BUCKET = '<GCS Bucket>'\n",
Review Comment:
I think the model eventually gets saved in the GCS_BUCKET, so it should just
be an empty bucket. I agree that adding an explanatory comment would be helpful
though, and maybe changing `<GCS BUCKET>` to `gs://<GCS_BUCKET>` so that the
desired formatting is clear
##########
examples/notebooks/beam-ml/run_inference_tensorflow_hub.ipynb:
##########
@@ -0,0 +1,307 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "634de4ce-07f7-4003-8ff6-d3f58b41a79d"
+ },
+ "source": [
+ "##### Copyright 2022 Google Inc.\n",
+ "\n",
+ "Licensed under the Apache License, Version 2.0 (the \"License\").\n",
+ "<!--\n",
+ " Licensed to the Apache Software Foundation (ASF) under one\n",
+ " or more contributor license agreements. See the NOTICE file\n",
+ " distributed with this work for additional information\n",
+ " regarding copyright ownership. The ASF licenses this file\n",
+ " to you under the Apache License, Version 2.0 (the\n",
+ " \"License\"); you may not use this file except in compliance\n",
+ " with the License. You may obtain a copy of the License at\n",
+ "\n",
+ " http://www.apache.org/licenses/LICENSE-2.0\n",
+ "\n",
+ " Unless required by applicable law or agreed to in writing,\n",
+ " software distributed under the License is distributed on an\n",
+ " \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n",
+ " KIND, either express or implied. See the License for the\n",
+ " specific language governing permissions and limitations\n",
+ " under the License.\n",
+ "-->\n"
+ ],
+ "id": "634de4ce-07f7-4003-8ff6-d3f58b41a79d"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3ac8fc4a-a0ef-47b9-bd80-10801eebe13e"
+ },
+ "source": [
+ "# RunInference with Sentenced T5 Model from TensorFlow Hub\n",
+ "\n",
+ "\n",
+ "In this notebook, we walk through the use of the RunInference
transform with a [sentence encoder built on T5
model](https://tfhub.dev/google/sentence-t5/st5-base/1) and testing it locally
with Interactive Runner.\n"
+ ],
+ "id": "3ac8fc4a-a0ef-47b9-bd80-10801eebe13e"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3402ecc9-28d6-4226-99b1-147a2d23b7a9"
+ },
+ "source": [
+ "## Install and import packages."
+ ],
+ "id": "3402ecc9-28d6-4226-99b1-147a2d23b7a9"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "b22e51ad-cbf2-4dfe-a1c6-3ad17d5a3749",
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "!pip install apache_beam[gcp]==2.41.0\n",
+ "!pip install apache-beam[interactive]==2.41.0\n",
+ "!pip install tensorflow==2.10.0\n",
+ "!pip install tensorflow_text==2.10.0\n",
+ "!pip install keras==2.10.0\n",
+ "!pip install tfx_bsl==1.10.0\n",
+ "!pip install pillow==8.4.0"
+ ],
+ "id": "b22e51ad-cbf2-4dfe-a1c6-3ad17d5a3749"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "f313a508-59ea-47ed-86eb-c9c8e67785f2",
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import importlib\n",
+ "\n",
+ "import numpy as np\n",
+ "import tensorflow as tf\n",
+ "import tensorflow_hub as hub\n",
+ "import tensorflow_text\n",
+ "\n",
+ "from tensorflow import keras\n",
+ "\n",
+ "from typing import Any\n",
+ "from typing import Dict\n",
+ "from typing import Iterable\n",
+ "from typing import Optional\n",
+ "from typing import Sequence\n",
+ "\n",
+ "import apache_beam as beam\n",
+ "import apache_beam.runners.interactive.interactive_beam as ib\n",
+ "\n",
+ "from apache_beam.ml.inference.base import RunInference\n",
+ "from apache_beam.ml.inference.base import ModelHandler\n",
+ "from apache_beam.ml.inference.base import PredictionResult\n",
+ "from apache_beam.internal import pickler\n",
+ "from apache_beam.runners.runner import PipelineResult\n",
+ "from apache_beam.runners.interactive.interactive_runner import
InteractiveRunner\n",
+ "\n",
+ "from tfx_bsl.public.beam.run_inference import CreateModelHandler\n",
+ "from tfx_bsl.public.proto import model_spec_pb2"
+ ],
+ "id": "f313a508-59ea-47ed-86eb-c9c8e67785f2"
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "74db0203-3d26-4bc4-8271-81fad9756297"
+ },
+ "source": [
+ "## Create a Keras Model from TensorFlow Hub Image"
+ ],
+ "id": "74db0203-3d26-4bc4-8271-81fad9756297"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "r1fgn5I_-mzA"
+ },
+ "outputs": [],
+ "source": [
+ "from google.colab import auth\n",
+ "auth.authenticate_user()"
+ ],
+ "id": "r1fgn5I_-mzA"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "2ff8e394-f577-4dea-bef9-a4f4528c1378"
+ },
+ "outputs": [],
+ "source": [
+ "PROJECT_ID = '<Project Id>'\n",
+ "GCS_BUCKET = '<GCS Bucket>'\n",
Review Comment:
Could you also strip off any trailing `/` characters so that
`gs://my/model/path` behaves the same as `gs://my/model/path/`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]