This is an automated email from the ASF dual-hosted git repository. hanahmily pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/skywalking-banyandb-client-proto.git
commit 374f5f8b7d105438fb2968cc36b55da3fdad488b Author: Gao Hongtao <[email protected]> AuthorDate: Fri Dec 19 06:49:00 2025 +0000 Update README.md to enhance documentation on BanyanDB client proto definitions, including sync process, prerequisites, and project structure. --- .asf.yml | 42 ++ .github/workflows/ci.yml | 87 ++++ .github/workflows/sync-proto.yml | 89 ++++ .gitignore | 10 + .licenserc.yaml | 35 ++ .mvn/wrapper/maven-wrapper.jar | Bin 0 -> 62547 bytes .mvn/wrapper/maven-wrapper.properties | 2 + HEADER | 14 + Makefile | 77 ++++ README.md | 122 +++++- mvnw | 29 ++ pom.xml | 107 +++++ scripts/sync_proto.py | 769 ++++++++++++++++++++++++++++++++++ 13 files changed, 1377 insertions(+), 6 deletions(-) diff --git a/.asf.yml b/.asf.yml new file mode 100644 index 0000000..ea8e4a8 --- /dev/null +++ b/.asf.yml @@ -0,0 +1,42 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +github: + description: BanyanDB public client Protocol Buffer / gRPC definitions + homepage: https://skywalking.apache.org/ + labels: + - skywalking + - observability + - apm + - distributed-tracing + - metrics + - logging + - database + - time-series + enabled_merge_buttons: + squash: true + merge: false + rebase: false + protected_branches: + main: + required_status_checks: + strict: true + contexts: + - CI + required_pull_request_reviews: + dismiss_stale_reviews: true + required_approving_review_count: 1 \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..b271909 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: CI + +on: + push: + branches: [ main ] + pull_request: + workflow_dispatch: + +jobs: + check-license-header: + name: Check License Header + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Check License Header + uses: apache/skywalking-eyes/header@20da317d1ad158e79e24355fdc28f53370e94c8a + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + verify-proto: + name: Generate and Compile Java from Proto + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up JDK 17 + uses: actions/setup-java@v4 + with: + java-version: '17' + distribution: 'temurin' + cache: maven + + - name: Cache Maven dependencies + uses: actions/cache@v4 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2 + + - name: Make Maven wrapper executable + if: hashFiles('mvnw') != '' + run: chmod +x mvnw + + - name: Generate Java code from proto files + run: | + if [ -f mvnw ]; then + ./mvnw clean compile + else + mvn clean compile + fi + + - name: Verify compilation succeeded + run: | + if [ ! -d "target/classes" ]; then + echo "Error: Java classes were not generated/compiled" + exit 1 + fi + echo "✓ Java code generated and compiled successfully" + echo "Generated classes location: target/classes" + + +ci: + name: CI + runs-on: ubuntu-latest + needs: [ check-license-header, verify-proto ] + steps: + - run: echo 'Success' diff --git a/.github/workflows/sync-proto.yml b/.github/workflows/sync-proto.yml new file mode 100644 index 0000000..cf82c70 --- /dev/null +++ b/.github/workflows/sync-proto.yml @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Sync Proto Files + +on: + workflow_dispatch: + inputs: + branch: + description: 'Branch or tag to sync from' + required: false + default: 'main' + type: string + +jobs: + sync-proto: + name: Sync Proto Files from Upstream + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + token: ${{ secrets.GITHUB_TOKEN }} + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Configure Git + run: | + git config --local user.email "[email protected]" + git config --local user.name "GitHub Action" + + - name: Sync proto files + id: sync + run: | + BRANCH="${{ github.event.inputs.branch || 'main' }}" + echo "Syncing proto files from branch: $BRANCH" + python3 scripts/sync_proto.py --branch "$BRANCH" --force + + # Check if there are any changes + if [ -n "$(git status --porcelain)" ]; then + echo "changes=true" >> $GITHUB_OUTPUT + echo "Proto files have been updated" + git status --short + else + echo "changes=false" >> $GITHUB_OUTPUT + echo "No changes detected" + fi + + - name: Create Pull Request + if: steps.sync.outputs.changes == 'true' + uses: peter-evans/create-pull-request@v6 + with: + token: ${{ secrets.GITHUB_TOKEN }} + commit-message: "chore: sync proto files from upstream" + title: "chore: Sync proto files from upstream" + body: | + This PR automatically syncs proto files from the Apache SkyWalking BanyanDB repository. + + **Source branch:** `${{ github.event.inputs.branch || 'main' }}` + + Please review the changes and merge if they look correct. + branch: sync-proto-files + delete-branch: true + labels: | + automated + proto-sync + draft: false + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bfe8acf --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +# Generated Java files +target/generated-sources/**/*.java +target/ + +# OS files +.DS_Store +Thumbs.db + +# Cursor +.cursor/ diff --git a/.licenserc.yaml b/.licenserc.yaml new file mode 100644 index 0000000..a545a99 --- /dev/null +++ b/.licenserc.yaml @@ -0,0 +1,35 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +header: + license: + spdx-id: Apache-2.0 + copyright-owner: Apache Software Foundation + + paths-ignore: + - '.github/PULL_REQUEST_TEMPLATE' + - '**/.gitignore' + - '.gitmodules' + - '.mvn' + - '**/*.md' + - '**/*.json' + - '**/target/**' + - 'LICENSE' + - 'NOTICE' + + comment: on-failure diff --git a/.mvn/wrapper/maven-wrapper.jar b/.mvn/wrapper/maven-wrapper.jar new file mode 100644 index 0000000..cb28b0e Binary files /dev/null and b/.mvn/wrapper/maven-wrapper.jar differ diff --git a/.mvn/wrapper/maven-wrapper.properties b/.mvn/wrapper/maven-wrapper.properties new file mode 100644 index 0000000..097e771 --- /dev/null +++ b/.mvn/wrapper/maven-wrapper.properties @@ -0,0 +1,2 @@ +distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip +wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar diff --git a/HEADER b/HEADER new file mode 100644 index 0000000..0ed96c4 --- /dev/null +++ b/HEADER @@ -0,0 +1,14 @@ +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4bfc9ec --- /dev/null +++ b/Makefile @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# BanyanDB Client Proto - Makefile + +# Binaries +MVNW := ./mvnw + +.PHONY: all check-versions generate compile clean sync-proto sync-proto-dry-run + +all: compile + +# Check versions +check-versions: + @echo "Checking installed versions..." + @echo "Required: JDK $(JDK_VERSION)" + @echo "" + @if command -v javac >/dev/null 2>&1; then \ + echo "javac version:"; \ + javac -version 2>&1 || $(JAVAC) -version 2>&1 || echo " Not found"; \ + else \ + echo "javac: Not found"; \ + fi + +# Generate Java code and compile using Maven +compile: + @echo "Generating Java code and compiling using Maven..." + @if [ ! -f pom.xml ]; then \ + echo "Error: pom.xml not found. Please ensure pom.xml exists in the project root."; \ + exit 1; \ + fi + @if [ -f $(MVNW) ]; then \ + MVN_CMD=$(MVNW); \ + elif command -v mvn >/dev/null 2>&1; then \ + MVN_CMD=mvn; \ + else \ + echo "Error: Maven not found. Please run 'make install-mvnw' first or install Maven."; \ + exit 1; \ + fi; \ + echo "Using Maven: $$MVN_CMD"; \ + $$MVN_CMD clean compile || exit 1; \ + echo "Java code generated and compiled successfully." + +# Clean generated files and Maven build artifacts +clean: + @if [ -f $(MVNW) ] || command -v mvn >/dev/null 2>&1; then \ + if [ -f $(MVNW) ]; then \ + $(MVNW) clean 2>/dev/null || true; \ + elif command -v mvn >/dev/null 2>&1; then \ + mvn clean 2>/dev/null || true; \ + fi; \ + fi + @echo "Clean completed." + +# Sync proto files from Apache SkyWalking BanyanDB repository +sync-proto: + @echo "Syncing proto files from Apache SkyWalking BanyanDB..." + @python3 scripts/sync_proto.py + +# Dry run: Preview proto file changes without writing +sync-proto-dry-run: + @echo "Dry run: Previewing proto file changes..." + @python3 scripts/sync_proto.py --dry-run + diff --git a/README.md b/README.md index b36dd3b..e446d2f 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,130 @@ -BanyanDB Client Proto Definitions -========== +# BanyanDB Client Proto Definitions <img src="http://skywalking.apache.org/assets/logo.svg" alt="Sky Walking logo" height="90px" align="right" /> -Protocol definitions in Protobuf/gRPC for BanyanDB clients +This repo contains the **BanyanDB public client Protocol Buffer / gRPC definitions**, plus a small toolchain to: -# Code of conduct +- **Sync** protos from the upstream BanyanDB repo (`apache/skywalking-banyandb`) +- **Normalize and merge** upstream protos into a stable client-facing layout under `proto/banyandb/v1/` +- **Generate + compile Java** from the synced protos to verify the sync is valid (also enforced in CI) + +## Upstream source of truth + +Protos are synced from the Apache SkyWalking BanyanDB repository: + +- **Repo**: `apache/skywalking-banyandb` +- **Path**: `api/proto/banyandb/<module>/v1/*.proto` + +Locally, this repo maintains a consolidated, client-friendly layout: + +- `proto/banyandb/v1/banyandb-common.proto` +- `proto/banyandb/v1/banyandb-database.proto` +- `proto/banyandb/v1/banyandb-measure.proto` +- `proto/banyandb/v1/banyandb-model.proto` +- `proto/banyandb/v1/banyandb-property.proto` +- `proto/banyandb/v1/banyandb-stream.proto` +- `proto/banyandb/v1/banyandb-trace.proto` + +These files are produced by `scripts/sync_proto.py`, which also rewrites imports to match the merged layout. + +## Prerequisites + +- **Python**: 3.x (for `scripts/sync_proto.py`) +- **Java**: JDK 17 (for compilation verification) +- **Maven**: use `./mvnw` (recommended) or a system `mvn` + +If you use `./mvnw` and hit a permission error, run: + +```bash +chmod +x mvnw +``` + +## Sync protos + +### Preview changes (dry-run) + +```bash +make sync-proto-dry-run +``` + +Or directly (with extra options): + +```bash +python3 scripts/sync_proto.py --branch main --dry-run +``` + +### Apply changes + +Interactive (asks for confirmation): + +```bash +make sync-proto +``` + +Non-interactive (useful in automation): + +```bash +python3 scripts/sync_proto.py --branch main --force +``` + +### Sync only specific module(s) + +```bash +python3 scripts/sync_proto.py --branch main --module common --module measure --force +``` + +Valid modules are: `common`, `database`, `measure`, `model`, `property`, `stream`, `trace`. + +### Sync via GitHub Actions (opens a PR) + +Run the workflow “Sync Proto Files” (`.github/workflows/sync-proto.yml`) with an optional `branch` input (defaults to `main`). + +## Verify the sync (generate + compile) + +The fastest correctness check after syncing is to **generate Java from the protos and compile**: + +```bash +make compile +``` + +This runs `mvn clean compile` (preferring `./mvnw` if present), which: + +- Generates Java sources from `proto/**/*.proto` into `target/generated-sources/` +- Compiles them into `target/classes/` + +This is also what CI runs in “Verify Proto Files” (`.github/workflows/verify-proto.yml`). + +## Clean + +```bash +make clean +``` + +## Project structure + +``` +. +├── proto/ +│ └── banyandb/ +│ └── v1/ # Consolidated (synced) proto files +├── scripts/ +│ └── sync_proto.py # Sync + merge tool (pulls from upstream GitHub) +├── pom.xml # Java compile verification (protoc + javac via Maven) +├── mvnw # Maven wrapper (preferred) +├── Makefile # Convenience targets +└── README.md +``` + +## Code of conduct This project adheres to the Contributor Covenant [code of conduct](https://www.apache.org/foundation/policies/conduct). By participating, you are expected to uphold this code. Please follow the [REPORTING GUIDELINES](https://www.apache.org/foundation/policies/conduct#reporting-guidelines) to report unacceptable behavior. -# Contact Us +## Contact Us * Mail list: **[email protected]**. Mail to `[email protected]`, follow the reply to subscribe the mail list. * Send `Request to join SkyWalking slack` mail to the mail list(`[email protected]`), we will invite you in. * Twitter, [ASFSkyWalking](https://twitter.com/ASFSkyWalking) * QQ Group: 901167865(Recommended), 392443393 * [bilibili B站 视频](https://space.bilibili.com/390683219) -# License +## License [Apache 2.0 License.](LICENSE) \ No newline at end of file diff --git a/mvnw b/mvnw new file mode 100755 index 0000000..6b1ef4f --- /dev/null +++ b/mvnw @@ -0,0 +1,29 @@ +#!/bin/sh +# ---------------------------------------------------------------------------- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ---------------------------------------------------------------------------- + +APP_HOME=$(cd "$(dirname "$0")" && pwd) +CLASSPATH="$APP_HOME/.mvn/wrapper/maven-wrapper.jar" +if [ -n "$JAVA_HOME" ]; then + JAVACMD="$JAVA_HOME/bin/java" +else + JAVACMD="java" +fi +cd "$APP_HOME" || { echo "Failed to cd to $APP_HOME" >&2; exit 1; } +exec "$JAVACMD" -Dmaven.multiModuleProjectDirectory="$APP_HOME" -classpath "$CLASSPATH" org.apache.maven.wrapper.MavenWrapperMain "$@" diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..6d0b722 --- /dev/null +++ b/pom.xml @@ -0,0 +1,107 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one or more + ~ contributor license agreements. See the NOTICE file distributed with + ~ this work for additional information regarding copyright ownership. + ~ The ASF licenses this file to You under the Apache License, Version 2.0 + ~ (the "License"); you may not use this file except in compliance with + ~ the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, software + ~ distributed under the License is distributed on an "AS IS" BASIS, + ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ~ See the License for the specific language governing permissions and + ~ limitations under the License. + ~ + --> + +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 + http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <groupId>org.apache.skywalking</groupId> + <artifactId>banyandb-client-proto</artifactId> + <version>1.0.0</version> + <packaging>jar</packaging> + + <name>BanyanDB Client Proto</name> + <description>Protocol definitions in Protobuf/gRPC for BanyanDB clients</description> + + <properties> + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> + <maven.compiler.source>17</maven.compiler.source> + <maven.compiler.target>17</maven.compiler.target> + <protobuf.version>4.28.3</protobuf.version> + </properties> + + <dependencies> + <dependency> + <groupId>com.google.protobuf</groupId> + <artifactId>protobuf-java</artifactId> + <version>${protobuf.version}</version> + </dependency> + <dependency> + <groupId>build.buf.protoc-gen-validate</groupId> + <artifactId>pgv-java-stub</artifactId> + <version>1.0.2</version> + </dependency> + </dependencies> + + <build> + <sourceDirectory>target/generated-sources</sourceDirectory> + <plugins> + <plugin> + <groupId>org.xolstice.maven.plugins</groupId> + <artifactId>protobuf-maven-plugin</artifactId> + <version>0.6.1</version> + <configuration> + <protocArtifact>com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}</protocArtifact> + <protoSourceRoot>${project.basedir}/proto</protoSourceRoot> + <includes> + <include>**/*.proto</include> + </includes> + <includeStdTypes>true</includeStdTypes> + <includeDependencies>true</includeDependencies> + <additionalProtoPathElements> + <additionalProtoPathElement>${project.basedir}</additionalProtoPathElement> + </additionalProtoPathElements> + </configuration> + <executions> + <execution> + <goals> + <goal>compile</goal> + </goals> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <version>3.11.0</version> + <configuration> + <source>17</source> + <target>17</target> + <encoding>UTF-8</encoding> + </configuration> + </plugin> + <plugin> + <groupId>kr.motd.maven</groupId> + <artifactId>os-maven-plugin</artifactId> + <version>1.7.1</version> + <executions> + <execution> + <phase>initialize</phase> + <goals> + <goal>detect</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> +</project> + diff --git a/scripts/sync_proto.py b/scripts/sync_proto.py new file mode 100755 index 0000000..e8115cf --- /dev/null +++ b/scripts/sync_proto.py @@ -0,0 +1,769 @@ +#!/usr/bin/env python3 +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +Proto File Sync Script for Apache SkyWalking BanyanDB + +This script syncs proto files from the Apache SkyWalking BanyanDB repository +and intelligently merges multiple remote files into consolidated local files. +""" + +import argparse +import os +import re +import sys +from typing import Dict, List, Set, Tuple +from urllib.request import urlopen +from urllib.error import URLError + +# ANSI color codes for terminal output +class Colors: + RESET = '\033[0m' + RED = '\033[91m' + GREEN = '\033[92m' + YELLOW = '\033[93m' + BLUE = '\033[94m' + MAGENTA = '\033[95m' + CYAN = '\033[96m' + BOLD = '\033[1m' + +# Configuration: Module mapping +MODULES = { + 'common': {'files': 'all'}, + 'database': {'files': ['schema.proto', 'rpc.proto']}, + 'measure': {'files': 'all'}, + 'model': {'files': 'all'}, + 'property': {'files': ['property.proto', 'rpc.proto']}, + 'stream': {'files': 'all'}, + 'trace': {'files': 'all'} +} + +# Exclusion list: Messages and RPCs to exclude from merged files +# Organized by module. Each module can have 'messages' and 'rpcs' lists. +EXCLUDE_LIST = { + 'common': { + 'messages': [], + 'rpcs': [] + }, + 'database': { + 'messages': [], + 'rpcs': [] + }, + 'measure': { + 'messages': ['DeleteExpiredSegmentsRequest', 'DeleteExpiredSegmentsResponse', 'InternalWriteRequest'], + 'rpcs': ['DeleteExpiredSegments'] + }, + 'model': { + 'messages': [], + 'rpcs': [] + }, + 'property': { + 'messages': ['InternalUpdateRequest', 'InternalDeleteRequest', 'InternalQueryResponse', 'InternalRepairRequest', 'InternalRepairResponse'], + 'rpcs': [] + }, + 'stream': { + 'messages': ['DeleteExpiredSegmentsRequest', 'DeleteExpiredSegmentsResponse', 'InternalWriteRequest'], + 'rpcs': ['DeleteExpiredSegments'] + }, + 'trace': { + 'messages': ['DeleteExpiredSegmentsRequest', 'DeleteExpiredSegmentsResponse', 'InternalWriteRequest'], + 'rpcs': ['DeleteExpiredSegments'] + } +} + +# Skip patterns: Patterns and options to skip during proto parsing +# - line_prefixes: Lines starting with these strings will be skipped +# - line_contains: Lines containing these strings will be skipped +# - option_blocks: Option blocks that require brace tracking (multi-line) +# - import_contains: Import statements containing these strings will be skipped +SKIP_PATTERNS = { + 'line_prefixes': [ + 'option go_package', + ], + 'line_contains': [ + 'option (grpc.gateway.protoc_gen_openapiv2.options.openapiv2_swagger)', + ], + 'option_blocks': [ + 'option (google.api.http)', + ], + 'import_contains': [ + 'google/api/annotations.proto', + 'protoc-gen-openapiv2/options/annotations.proto', + ], +} + +# GitHub repository configuration +GITHUB_REPO = "apache/skywalking-banyandb" +GITHUB_RAW_BASE = "https://raw.githubusercontent.com" +REMOTE_PROTO_PATH = "api/proto/banyandb" + + +def fetch_directory_listing(branch: str, module: str) -> List[str]: + """ + Fetch the list of proto files in a remote directory using GitHub API. + Falls back to a predefined list if API fails. + """ + # Try GitHub API first + api_url = f"https://api.github.com/repos/{GITHUB_REPO}/contents/{REMOTE_PROTO_PATH}/{module}/v1?ref={branch}" + + try: + import json + with urlopen(api_url) as response: + data = json.loads(response.read().decode('utf-8')) + proto_files = [item['name'] for item in data if item['name'].endswith('.proto')] + return sorted(proto_files) + except Exception as e: + print(f"{Colors.YELLOW}Warning: Could not fetch directory listing via API: {e}{Colors.RESET}") + print(f"{Colors.YELLOW}Trying common file names...{Colors.RESET}") + + # Fallback: Try common file names + common_names = ['rpc.proto', 'write.proto', 'query.proto', 'schema.proto', + 'topn.proto', 'model.proto', 'common.proto'] + found_files = [] + for filename in common_names: + url = f"{GITHUB_RAW_BASE}/{GITHUB_REPO}/{branch}/{REMOTE_PROTO_PATH}/{module}/v1/{filename}" + try: + with urlopen(url) as response: + if response.status == 200: + found_files.append(filename) + except: + pass + + if found_files: + return sorted(found_files) + else: + raise Exception(f"Could not determine proto files for module '{module}'") + + +def fetch_proto_file(branch: str, module: str, filename: str) -> str: + """Fetch a single proto file from GitHub.""" + url = f"{GITHUB_RAW_BASE}/{GITHUB_REPO}/{branch}/{REMOTE_PROTO_PATH}/{module}/v1/{filename}" + + try: + with urlopen(url) as response: + return response.read().decode('utf-8') + except URLError as e: + raise Exception(f"Failed to fetch {url}: {e}") + + +def parse_proto_file(content: str) -> Dict[str, any]: + """ + Parse a proto file into structured components. + Returns a dict with: license, syntax, java_package, package, imports, body + """ + lines = content.split('\n') + result = { + 'license': [], + 'syntax': None, + 'java_package': None, + 'package': None, + 'imports': [], + 'body': [] + } + + license_done = False + syntax_done = False + package_done = False + in_http_option = False + brace_depth = 0 + + for i, line in enumerate(lines): + stripped = line.strip() + + # Detect license header - collect all consecutive comment lines from the start + if not license_done: + # Check if this is a comment line (// or /* */ style) + is_comment = (stripped.startswith('//') or + stripped.startswith('/*') or + (stripped.startswith('*') and i > 0 and '/*' in lines[i-1])) + + if is_comment: + result['license'].append(line) + continue + elif not stripped: + # Empty line after comments - license is done + license_done = True + continue + else: + # Non-comment, non-empty line - license must have ended + license_done = True + # Fall through to process this line + + # Skip empty lines after license but before syntax + if license_done and not syntax_done and not stripped: + continue + + # Parse syntax + if not syntax_done and stripped.startswith('syntax ='): + result['syntax'] = line + syntax_done = True + continue + + # Parse java_package option + if stripped.startswith('option java_package'): + if not result['java_package']: + result['java_package'] = line + continue + + # Skip lines matching line_prefixes patterns + if any(stripped.startswith(prefix) for prefix in SKIP_PATTERNS['line_prefixes']): + continue + + # Skip lines matching line_contains patterns + if any(pattern in stripped for pattern in SKIP_PATTERNS['line_contains']): + continue + + # Track and skip option blocks (multi-line options requiring brace tracking) + # Check if we're starting a new option block (can appear after syntax) + if syntax_done: + for option_pattern in SKIP_PATTERNS['option_blocks']: + if option_pattern in stripped: + # Always skip the line when we detect the pattern + brace_depth = stripped.count('{') - stripped.count('}') + if brace_depth > 0: + # Multi-line option, start tracking + in_http_option = True + # For single-line options, brace_depth <= 0, so we just skip and continue + continue + + if in_http_option: + # We're inside an option block, track braces + brace_depth += stripped.count('{') - stripped.count('}') + if brace_depth <= 0: + # Reached the end of the option block + in_http_option = False + continue + + # Parse package declaration + if not package_done and stripped.startswith('package '): + result['package'] = line + package_done = True + continue + + # Parse imports (skip patterns in import_contains) + if stripped.startswith('import '): + if not any(pattern in line for pattern in SKIP_PATTERNS['import_contains']): + result['imports'].append(line) + continue + + # Everything else is body (skip if we're in an option block being tracked) + if syntax_done and package_done and not in_http_option: + result['body'].append(line) + + return result + + +def transform_import_path(import_line: str) -> str: + """ + Transform import paths from old format to new merged format. + banyandb/{module}/v1/{file}.proto -> banyandb/v1/banyandb-{module}.proto + + Example: + import "banyandb/common/v1/common.proto"; -> import "banyandb/v1/banyandb-common.proto"; + import "banyandb/model/v1/query.proto"; -> import "banyandb/v1/banyandb-model.proto"; + """ + import re + + # Pattern to match: banyandb/{module}/v1/{file}.proto + # Extract the module name and replace the entire path + def replace_import(match): + module = match.group(1) + return f'banyandb/v1/banyandb-{module}.proto' + + # Replace the path pattern in the import statement + new_line = re.sub( + r'banyandb/([^/]+)/v1/[^"]+\.proto', + replace_import, + import_line + ) + + return new_line + + +def filter_excluded_definitions(body_lines: List[str], exclude_messages: List[str], exclude_rpcs: List[str]) -> List[str]: + """ + Filter out excluded messages and RPCs from the body. + + Args: + body_lines: The body lines to filter + exclude_messages: List of message names to exclude + exclude_rpcs: List of RPC names to exclude + + Returns: + Filtered body lines with excluded definitions removed + """ + if not body_lines or (not exclude_messages and not exclude_rpcs): + return body_lines + + result = [] + i = 0 + skip_until_close = False + brace_depth = 0 + + while i < len(body_lines): + line = body_lines[i] + stripped = line.strip() + + # Check if we're currently skipping a definition + if skip_until_close: + # Track brace depth to know when the definition ends + brace_depth += stripped.count('{') - stripped.count('}') + + if brace_depth <= 0: + # Definition ended, stop skipping + skip_until_close = False + brace_depth = 0 + + i += 1 + continue + + # Check for excluded messages + if exclude_messages and stripped.startswith('message '): + # Extract message name + match = re.match(r'message\s+(\w+)', stripped) + if match: + message_name = match.group(1) + if message_name in exclude_messages: + # Start skipping this message definition + skip_until_close = True + brace_depth = stripped.count('{') - stripped.count('}') + i += 1 + continue + + # Check for excluded RPCs + if exclude_rpcs and stripped.startswith('rpc '): + # Extract RPC name + match = re.match(r'rpc\s+(\w+)', stripped) + if match: + rpc_name = match.group(1) + if rpc_name in exclude_rpcs: + # Check if this is a single-line RPC (ends with ;) or multi-line (has {) + if stripped.endswith(';'): + # Single-line RPC, skip just this line + i += 1 + continue + elif '{' in stripped: + # Multi-line RPC, start skipping + skip_until_close = True + brace_depth = stripped.count('{') - stripped.count('}') + i += 1 + continue + + # Keep this line + result.append(line) + i += 1 + + return result + + +def remove_options_from_rpc_blocks(body_lines: List[str]) -> List[str]: + """ + Remove option lines from inside RPC method blocks and convert to single-line RPCs if only options remain. + Removes lines matching patterns in SKIP_PATTERNS['option_blocks'] from within RPC definitions. + Converts: + rpc Query(QueryRequest) returns (QueryResponse) { + option (google.api.http) = {...}; + } + to: + rpc Query(QueryRequest) returns (QueryResponse); + """ + if not body_lines: + return body_lines + + result = [] + i = 0 + while i < len(body_lines): + line = body_lines[i] + stripped = line.strip() + + # Check if this is an RPC line ending with { + if 'rpc ' in stripped and stripped.endswith('{'): + rpc_line = line + j = i + 1 + block_content = [] + option_count = 0 + + # Scan through the RPC block and collect non-option lines + while j < len(body_lines): + block_line = body_lines[j] + block_stripped = block_line.strip() + + # Check if we hit another RPC or service/message definition (malformed block) + if (block_stripped.startswith('rpc ') or + block_stripped.startswith('service ') or + block_stripped.startswith('message ')): + # Malformed RPC block (no proper closing brace), convert to single-line + rpc_single_line = rpc_line.rstrip().rstrip('{').rstrip() + result.append(rpc_single_line + ';') + # Continue from this line (don't skip it) + i = j + break + + # Check if we've reached a closing brace + if block_stripped == '}': + # This could be the RPC block's closing brace OR the service's closing brace + # If we only found options (and empty lines), it's likely the RPC's closing brace + # If we found no options at all, it might be the service's closing brace (malformed RPC) + + has_non_empty_content = any(line.strip() for line in block_content) + + if option_count > 0 or has_non_empty_content: + # We found content in this block, so this } belongs to the RPC + if has_non_empty_content: + # Keep the block with non-option content + result.append(rpc_line) + result.extend(block_content) + result.append(block_line) + else: + # Only had options (now removed), convert to single-line + rpc_single_line = rpc_line.rstrip().rstrip('{').rstrip() + result.append(rpc_single_line + ';') + i = j + 1 + else: + # No options and no content found, this } likely belongs to service (malformed RPC) + rpc_single_line = rpc_line.rstrip().rstrip('{').rstrip() + result.append(rpc_single_line + ';') + # Don't consume the }, continue from it + i = j + break + + # Check if this line is an option we want to remove + is_skip_option = any(pattern in block_stripped for pattern in SKIP_PATTERNS['option_blocks']) + + if is_skip_option: + option_count += 1 + else: + # Keep lines that are not skippable options (including empty lines) + block_content.append(block_line) + + j += 1 + else: + # Reached end of file without closing brace, convert to single-line + rpc_single_line = rpc_line.rstrip().rstrip('{').rstrip() + result.append(rpc_single_line + ';') + i += 1 + continue + + result.append(line) + i += 1 + + return result + + +def cleanup_empty_rpc_blocks(body_lines: List[str]) -> List[str]: + """ + Remove empty {} blocks from RPC definitions. + Converts: + rpc Query(QueryRequest) returns (QueryResponse) { + } + to: + rpc Query(QueryRequest) returns (QueryResponse); + """ + if not body_lines: + return body_lines + + result = [] + i = 0 + while i < len(body_lines): + line = body_lines[i] + stripped = line.strip() + + # Check if this is an RPC line ending with { + if 'rpc ' in stripped and stripped.endswith('{'): + # Look ahead to see if the next non-empty line is just } + j = i + 1 + # Skip empty lines + while j < len(body_lines) and not body_lines[j].strip(): + j += 1 + + if j < len(body_lines) and body_lines[j].strip() == '}': + # Found empty RPC block, remove the { and the } + # Replace the rpc line to remove the { at the end + rpc_line = line.rstrip().rstrip('{').rstrip() + result.append(rpc_line + ';') + # Skip the closing } + i = j + 1 + continue + + result.append(line) + i += 1 + + return result + + +def merge_proto_files(proto_contents: List[str], current_module: str = None, exclude_messages: List[str] = None, exclude_rpcs: List[str] = None) -> str: + """ + Intelligently merge multiple proto files into one. + - Keep one license header + - Keep one syntax declaration + - Keep one java_package option + - Keep one package declaration + - Merge and deduplicate imports + - Concatenate all body content + """ + if not proto_contents: + return "" + + parsed_files = [parse_proto_file(content) for content in proto_contents] + + # Build the merged content + merged = [] + + # 1. License header (from first file) + if parsed_files[0]['license']: + merged.extend(parsed_files[0]['license']) + merged.append('') + + # 2. Syntax declaration (from first file) + if parsed_files[0]['syntax']: + merged.append(parsed_files[0]['syntax']) + merged.append('') + + # 3. Java package option (from first file) + if parsed_files[0]['java_package']: + merged.append(parsed_files[0]['java_package']) + merged.append('') + + # 4. Package declaration (from first file) + if parsed_files[0]['package']: + merged.append(parsed_files[0]['package']) + merged.append('') + + # 5. Merge, transform, and deduplicate imports + all_imports: Set[str] = set() + for parsed in parsed_files: + for imp in parsed['imports']: + # Transform the import path to the new merged file format + transformed_imp = transform_import_path(imp.strip()) + + # Filter out self-imports (imports that reference the same module being merged) + if current_module and f'banyandb/v1/banyandb-{current_module}.proto' in transformed_imp: + continue + + all_imports.add(transformed_imp) + + if all_imports: + # Sort imports: google first, then validate, then banyandb + sorted_imports = sorted(all_imports, key=lambda x: ( + 0 if 'google/' in x else (1 if 'validate/' in x else 2), + x + )) + merged.extend(sorted_imports) + merged.append('') + + # 6. Concatenate bodies + for i, parsed in enumerate(parsed_files): + if parsed['body']: + # Remove leading empty lines from body + body_lines = parsed['body'] + while body_lines and not body_lines[0].strip(): + body_lines.pop(0) + + # Remove trailing empty lines from body + while body_lines and not body_lines[-1].strip(): + body_lines.pop() + + # Filter out patterns defined in SKIP_PATTERNS (secondary filter for body content) + body_lines = [line for line in body_lines + if not any(line.strip().startswith(prefix) for prefix in SKIP_PATTERNS['line_prefixes']) + and not any(pattern in line.strip() for pattern in SKIP_PATTERNS['line_contains'])] + + # Filter out excluded messages and RPCs + if exclude_messages or exclude_rpcs: + body_lines = filter_excluded_definitions( + body_lines, + exclude_messages or [], + exclude_rpcs or [] + ) + + # Remove option blocks from inside RPC methods + body_lines = remove_options_from_rpc_blocks(body_lines) + + # Clean up empty RPC blocks + body_lines = cleanup_empty_rpc_blocks(body_lines) + + if body_lines: + if i > 0: + # Add separator between files + merged.append('') + merged.extend(body_lines) + + # Remove trailing empty lines + while merged and not merged[-1].strip(): + merged.pop() + + # Ensure file ends with newline + return '\n'.join(merged) + '\n' + + +def sync_module(branch: str, module: str, config: Dict, dry_run: bool = False) -> Tuple[bool, str]: + """ + Sync a single module. + Returns (changed, message) tuple. + """ + print(f"{Colors.CYAN}Processing module: {module}{Colors.RESET}") + + # Determine which files to fetch + if config['files'] == 'all': + try: + proto_files = fetch_directory_listing(branch, module) + except Exception as e: + return False, f"{Colors.RED}Error: {e}{Colors.RESET}" + else: + proto_files = config['files'] + + print(f" Files to sync: {', '.join(proto_files)}") + + # Fetch all proto files + fetched_contents = [] + for filename in proto_files: + try: + print(f" Fetching {filename}...", end=' ') + content = fetch_proto_file(branch, module, filename) + fetched_contents.append(content) + print(f"{Colors.GREEN}✓{Colors.RESET}") + except Exception as e: + print(f"{Colors.RED}✗{Colors.RESET}") + return False, f"{Colors.RED}Error fetching {filename}: {e}{Colors.RESET}" + + # Get exclusion lists for this module + exclude_config = EXCLUDE_LIST.get(module, {}) + exclude_messages = exclude_config.get('messages', []) + exclude_rpcs = exclude_config.get('rpcs', []) + + if exclude_messages or exclude_rpcs: + print(f" Applying exclusions: {len(exclude_messages)} messages, {len(exclude_rpcs)} RPCs") + + # Merge proto files + print(f" Merging {len(fetched_contents)} files...") + merged_content = merge_proto_files( + fetched_contents, + current_module=module, + exclude_messages=exclude_messages, + exclude_rpcs=exclude_rpcs + ) + + # Determine output path + output_path = f"proto/banyandb/v1/banyandb-{module}.proto" + + # Check if file exists and compare + file_exists = os.path.exists(output_path) + changed = False + + if file_exists: + with open(output_path, 'r', encoding='utf-8') as f: + existing_content = f.read() + changed = existing_content != merged_content + else: + changed = True + + if changed: + print(f" {Colors.YELLOW}Changes detected{Colors.RESET}") + + if not dry_run: + # Write the merged file + os.makedirs(os.path.dirname(output_path), exist_ok=True) + with open(output_path, 'w', encoding='utf-8') as f: + f.write(merged_content) + status = "updated" if file_exists else "created" + print(f" {Colors.GREEN}✓ File {status}: {output_path}{Colors.RESET}") + else: + print(f" {Colors.BLUE}[DRY RUN] Would update: {output_path}{Colors.RESET}") + else: + print(f" {Colors.GREEN}✓ No changes needed{Colors.RESET}") + + return changed, output_path + + +def main(): + parser = argparse.ArgumentParser( + description='Sync proto files from Apache SkyWalking BanyanDB repository' + ) + parser.add_argument( + '--branch', + default='main', + help='Branch or tag to sync from (default: main)' + ) + parser.add_argument( + '--dry-run', + action='store_true', + help='Preview changes without writing files' + ) + parser.add_argument( + '--module', + action='append', + help='Sync only specific module(s). Can be specified multiple times.' + ) + parser.add_argument( + '--force', + action='store_true', + help='Skip confirmation prompts' + ) + + args = parser.parse_args() + + # Determine which modules to sync + modules_to_sync = MODULES + if args.module: + modules_to_sync = {k: v for k, v in MODULES.items() if k in args.module} + if not modules_to_sync: + print(f"{Colors.RED}Error: No valid modules specified{Colors.RESET}") + print(f"Valid modules: {', '.join(MODULES.keys())}") + sys.exit(1) + + # Display configuration + print(f"{Colors.BOLD}=== Proto File Sync ==={Colors.RESET}") + print(f"Repository: {GITHUB_REPO}") + print(f"Branch: {args.branch}") + print(f"Modules: {', '.join(modules_to_sync.keys())}") + print(f"Mode: {'DRY RUN' if args.dry_run else 'LIVE'}") + print() + + # Confirmation prompt + if not args.force and not args.dry_run: + response = input(f"{Colors.YELLOW}Proceed with sync? [y/N]: {Colors.RESET}") + if response.lower() not in ['y', 'yes']: + print("Cancelled.") + sys.exit(0) + print() + + # Sync each module + results = [] + for module, config in modules_to_sync.items(): + changed, message = sync_module(args.branch, module, config, args.dry_run) + results.append((module, changed, message)) + print() + + # Summary + print(f"{Colors.BOLD}=== Summary ==={Colors.RESET}") + changed_count = sum(1 for _, changed, _ in results if changed) + + for module, changed, message in results: + status = f"{Colors.YELLOW}CHANGED{Colors.RESET}" if changed else f"{Colors.GREEN}UNCHANGED{Colors.RESET}" + print(f" {module}: {status}") + + print() + if args.dry_run: + print(f"{Colors.BLUE}Dry run complete. {changed_count} file(s) would be updated.{Colors.RESET}") + else: + print(f"{Colors.GREEN}Sync complete. {changed_count} file(s) updated.{Colors.RESET}") + + +if __name__ == '__main__': + main() +
