This is an automated email from the ASF dual-hosted git repository.

lostluck pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new bdd29bf45f8 Add script to fetch SCIO examples (#26891)
bdd29bf45f8 is described below

commit bdd29bf45f818ce0c20c111c9787d5051c1b792b
Author: Timur Sultanov <timur.sulta...@akvelon.com>
AuthorDate: Wed May 31 22:37:52 2023 +0400

    Add script to fetch SCIO examples (#26891)
---
 playground/infrastructure/fetch_scala_examples.py | 245 ++++++++++++++++++++++
 playground/infrastructure/requirements.txt        |   2 +
 2 files changed, 247 insertions(+)

diff --git a/playground/infrastructure/fetch_scala_examples.py 
b/playground/infrastructure/fetch_scala_examples.py
new file mode 100644
index 00000000000..6a8682c8a10
--- /dev/null
+++ b/playground/infrastructure/fetch_scala_examples.py
@@ -0,0 +1,245 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from dataclasses import dataclass
+from typing import List, Tuple
+
+import argparse
+import requests
+import re
+
+from models import ComplexityEnum
+
+SCIO_REPOSITORY = "https://raw.githubusercontent.com/spotify/scio/";
+SCIO_BRANCH = "main"
+
+
+@dataclass
+class ScioExampleTag:
+    filepath: str
+    name: str
+    description: str
+    multifile: bool
+    pipeline_options: str
+    default_example: bool
+    context_line: int
+    categories: List[str]
+    complexity: ComplexityEnum
+    tags: List[str]
+
+
+SCIO_EXAMPLES: List[ScioExampleTag] = [
+    ScioExampleTag(
+        
filepath="scio-examples/src/main/scala/com/spotify/scio/examples/DebuggingWordCount.scala",
+        name="DebuggingWordCount",
+        description="Word Count Example with Assertions.",
+        multifile=False,
+        pipeline_options="--output output.txt",
+        default_example=False,
+        context_line=1,
+        categories=["Debugging", "Filtering", "Options", "Quickstart"],
+        complexity=ComplexityEnum.MEDIUM,
+        tags=["Example"]
+        ),
+    ScioExampleTag(
+        
filepath="scio-examples/src/main/scala/com/spotify/scio/examples/extra/MetricsExample.scala",
+        name="MetricsExample",
+        description="Metrics example.",
+        multifile=False,
+        pipeline_options="",
+        default_example=False,
+        context_line=1,
+        categories=["Combiners", "Filtering", "IO", "Core Transforms", 
"Quickstart"],
+        complexity=ComplexityEnum.MEDIUM,
+        tags=["Example"]
+    ),
+    ScioExampleTag(
+        
filepath="scio-examples/src/main/scala/com/spotify/scio/examples/MinimalWordCount.scala",
+        name="MinimalWordCount",
+        description="An example that counts words in Shakespeare's works.",
+        multifile=False,
+        pipeline_options="--output output.txt",
+        default_example=True,
+        context_line=1,
+        categories=["Combiners", "Filtering", "IO", "Core Transforms", 
"Quickstart"],
+        complexity=ComplexityEnum.BASIC,
+        tags=["Example"]
+    ),
+    ScioExampleTag(
+        
filepath="scio-examples/src/main/scala/com/spotify/scio/examples/extra/SafeFlatMapExample.scala",
+        name="SafeFlatMapExample",
+        description="SafeFlatMap usage",
+        multifile=False,
+        pipeline_options="--output output.txt",
+        default_example=False,
+        context_line=1,
+        categories=["Combiners", "Filtering", "IO", "Core Transforms", 
"Quickstart"],
+        complexity=ComplexityEnum.MEDIUM,
+        tags=["Example"]
+    ),
+    ScioExampleTag(
+        
filepath="scio-examples/src/main/scala/com/spotify/scio/examples/extra/StatefulExample.scala",
+        name="StatefulExample",
+        description="Stateful Processing.",
+        multifile=False,
+        pipeline_options="",
+        default_example=False,
+        context_line=1,
+        categories=["Combiners", "Filtering", "IO", "Core Transforms", 
"Quickstart"],
+        complexity=ComplexityEnum.MEDIUM,
+        tags=["Example"]
+    ),
+    ScioExampleTag(
+        
filepath="scio-examples/src/main/scala/com/spotify/scio/examples/complete/TfIdf.scala",
+        name="TfIdf",
+        description="Compute TF-IDF from a Text Corpus.",
+        multifile=False,
+        pipeline_options="--output output.txt",
+        default_example=False,
+        context_line=1,
+        categories=["Combiners", "Filtering", "IO", "Core Transforms", 
"Quickstart"],
+        complexity=ComplexityEnum.MEDIUM,
+        tags=["Example"]
+    ),
+    ScioExampleTag(
+        
filepath="scio-examples/src/main/scala/com/spotify/scio/examples/extra/TsvExample.scala",
+        name="TsvExampleWrite",
+        description="Reading and writing tsv data.",
+        multifile=False,
+        pipeline_options="--output output.txt",
+        default_example=False,
+        context_line=1,
+        categories=["Combiners", "Filtering", "IO", "Core Transforms", 
"Quickstart"],
+        complexity=ComplexityEnum.MEDIUM,
+        tags=["Example"]
+    ),
+    ScioExampleTag(
+        
filepath="scio-examples/src/main/scala/com/spotify/scio/examples/WordCount.scala",
+        name="WordCount",
+        description="An example that counts words in Shakespeare's works.",
+        multifile=False,
+        pipeline_options="--output output.txt",
+        default_example=False,
+        context_line=1,
+        categories=["Combiners", "Options", "Quickstart"],
+        complexity=ComplexityEnum.MEDIUM,
+        tags=["Example"]
+    ),
+    ScioExampleTag(
+        
filepath="scio-examples/src/main/scala/com/spotify/scio/examples/extra/WordCountScioIO.scala",
+        name="WordCountScioIO",
+        description="Word Count Example with Metrics and ScioIO read/write.",
+        multifile=False,
+        pipeline_options="--output output.txt",
+        default_example=False,
+        context_line=1,
+        categories=["Combiners", "Filtering", "IO", "Core Transforms", 
"Quickstart"],
+        complexity=ComplexityEnum.MEDIUM,
+        tags=["Example"]
+    ),
+    ScioExampleTag(
+        
filepath="scio-examples/src/main/scala/com/spotify/scio/examples/extra/WriteDynamicExample.scala",
+        name="WriteDynamicExample",
+        description="Demonstrates saveAsDynamic* methods.",
+        multifile=False,
+        pipeline_options="--output output.txt",
+        default_example=False,
+        context_line=1,
+        categories=["Combiners", "Filtering", "IO", "Core Transforms", 
"Quickstart"],
+        complexity=ComplexityEnum.MEDIUM,
+        tags=["Example"]
+    ),
+]
+
+
+def fetch_scala_examples() -> Tuple[ScioExampleTag, str]:
+    """Fetch all Scala examples from the Scio repository."""
+    urls = [(example, SCIO_REPOSITORY + "/" + SCIO_BRANCH + "/" + 
example.filepath) for example in SCIO_EXAMPLES]
+    for example, url in urls:
+        result = requests.get(url)
+        if result.status_code != 200:
+            print(f"Failed to fetch {url} with status code 
{result.status_code}, skipping")
+            continue
+        content = result.text
+        yield example, content
+
+
+def serialize_tag_to_yaml(tag: ScioExampleTag) -> str:
+    """Serialize a Tag to YAML."""
+
+    yaml = f"""beam-playground:
+    name: "{tag.name}"
+    description: "{tag.description}"
+    multifile: {tag.multifile}
+    pipeline_options: "{tag.pipeline_options}"
+    default_example: {tag.default_example}
+    context_line: {tag.context_line}
+    categories:
+"""
+    for category in tag.categories:
+        yaml += f"        - \"{category}\"\n"
+    yaml += f"    complexity: {tag.complexity}\n"
+    yaml += f"    tags:\n"
+    for t in tag.tags:
+        yaml += f"        - \"{t}\"\n"
+    return yaml
+
+
+def insert_tag_into_source(tag_yaml: str, source: str) -> str:
+    """Insert the tag YAML into the source code."""
+
+    lines = source.split("\n")
+    package_line = 0
+    for i, line in enumerate(lines):
+        if line.startswith("package"):
+            package_line = i
+            break
+
+    object_line = 0
+    for i, line in enumerate(lines):
+        if line.startswith("object"):
+            object_line = i
+            break
+
+    tag_lines_number = tag_yaml.count("\n")
+    tag_yaml = re.sub(r"context_line: \d+", f"context_line: {object_line + 
tag_lines_number + 3}", tag_yaml)
+    tag_yaml = "// " + tag_yaml.replace("\n", "\n// ")
+    tag_yaml = "\n" + tag_yaml
+
+    # Insert the tag YAML before the package definition
+    lines.insert(package_line, tag_yaml)
+
+    return "\n".join(lines)
+
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument("--output-dir", dest="output_dir", help="Output 
directory", required=True)
+
+if __name__ == "__main__":
+    args = argparser.parse_args()
+
+    if not os.path.exists(args.output_dir):
+        os.makedirs(args.output_dir)
+
+    for tag, source in fetch_scala_examples():
+        tag_yaml = serialize_tag_to_yaml(tag)
+        source = insert_tag_into_source(tag_yaml, source)
+
+        output_path = os.path.join(args.output_dir, tag.name + ".scala")
+
+        with open(output_path, "w") as f:
+            f.write(source)
+
+        print(f"Written {output_path}")
diff --git a/playground/infrastructure/requirements.txt 
b/playground/infrastructure/requirements.txt
index 936b077129d..a5023df8d0a 100644
--- a/playground/infrastructure/requirements.txt
+++ b/playground/infrastructure/requirements.txt
@@ -26,3 +26,5 @@ pydantic==1.10.2
 grpcio-tools==1.51.1
 protobuf==4.21.12
 google-cloud-datastore==2.11.0
+
+requests~=2.28.2

Reply via email to