>From Michael Blow <[email protected]>: Michael Blow has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21044?usp=email )
Change subject: [NO ISSUE][HYR][MISC] Add annotation for citing AI contributions ...................................................................... [NO ISSUE][HYR][MISC] Add annotation for citing AI contributions - user model changes: no - storage format changes: no - interface changes: no Add a repeatable @AiProvenance annotation and supporting enums to record which AI model and interface produced or assisted with a piece of code. Generated-by: OpenAI GPT (5.3 via browser, 5-Mini via Copilot) Ext-ref: MB-71077 Change-Id: I00907d92d85ba9460b183794704a06ff73612329 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21044 Reviewed-by: Ian Maxon <[email protected]> Tested-by: Jenkins <[email protected]> Reviewed-by: Ritik Raj <[email protected]> --- A hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/annotations/AiProvenance.java 1 file changed, 310 insertions(+), 0 deletions(-) Approvals: Jenkins: Verified Ritik Raj: Looks good to me, approved Ian Maxon: Looks good to me, approved Objections: Anon. E. Moose #1000171: Violations found diff --git a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/annotations/AiProvenance.java b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/annotations/AiProvenance.java new file mode 100644 index 0000000..bee45b8 --- /dev/null +++ b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/annotations/AiProvenance.java @@ -0,0 +1,310 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hyracks.util.annotations; + +import static org.apache.hyracks.util.annotations.AiProvenance.AiProvenances; +import static org.apache.hyracks.util.annotations.AiProvenance.Agent.GPT_5_3; +import static org.apache.hyracks.util.annotations.AiProvenance.Agent.GPT_5_MINI; +import static org.apache.hyracks.util.annotations.AiProvenance.ContributionKind.GENERATED; +import static org.apache.hyracks.util.annotations.AiProvenance.Tool.CHATGPT_UI; +import static org.apache.hyracks.util.annotations.AiProvenance.Tool.GITHUB_COPILOT; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Repeatable; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Annotation used to record AI provenance metadata for a program element. + * + * <p>This annotation may be applied to types, methods, constructors and fields + * to indicate that the element was generated or assisted by an AI model. It + * records which {@link Agent} (model), which {@link Tool} (invocation + * surface) and the kind of contribution via {@link ContributionKind}.</p> + * + * <p>The annotation is repeatable using the {@link AiProvenances} container + * which allows multiple provenance records to be attached to the same + * program element (for example: initial draft generated by one model and + * later refinement by another).</p> + * + * <p>Example:</p> + * <pre> + * import static org.apache.hyracks.util.annotations.AiProvenance.Agent.*; + * import static org.apache.hyracks.util.annotations.AiProvenance.Tool.*; + * import static org.apache.hyracks.util.annotations.AiProvenance.ContributionKind.*; + * + * {@literal @}AiProvenance(agent = GPT_5_MINI, tool = OPENAI_API, contributionKind = GENERATED, + * notes = "Initial PoC helper generated by GPT-5 Mini") + * public class ExampleGeneratedClass { ... } + * </pre> + * + */ +@AiProvenance(agent = GPT_5_3, tool = CHATGPT_UI, contributionKind = GENERATED, notes = "Initial implementation generated via GPT-5.3 (browser)") +@AiProvenance(agent = GPT_5_MINI, tool = GITHUB_COPILOT, contributionKind = GENERATED, notes = "Refinements / Javadocs generated via GPT-5 Mini (GitHub Copilot)") +@Retention(RetentionPolicy.SOURCE) +@Target({ ElementType.TYPE, ElementType.METHOD, ElementType.CONSTRUCTOR, ElementType.FIELD }) +@Repeatable(AiProvenances.class) +public @interface AiProvenance { + Agent agent(); + + Tool tool(); + + ContributionKind contributionKind(); + + String notes() default ""; + + enum ContributionKind { + /** + * The element was fully generated by an AI model. + */ + GENERATED, + + /** + * The element was assisted or suggested by an AI model and requires + * human review or modification. + */ + ASSISTED, + + /** + * The element was refactored or rewritten by an AI model (not newly + * generated from scratch). + */ + REFACTORED, + + /** + * The element is a test that was generated by an AI model. + */ + TEST_GENERATED, + + /** + * The element is documentation that was generated by an AI model. + */ + DOC_GENERATED + } + + enum Agent { + // OpenAI + GPT_5_4("openai", "gpt-5.4", "GPT-5.4"), + GPT_5_3("openai", "gpt-5.3", "GPT-5.3"), + GPT_5_2("openai", "gpt-5.2", "GPT-5.2"), + GPT_5_MINI("openai", "gpt-5-mini", "GPT-5 Mini"), + GPT_5_1_CODEX("openai", "gpt-5.1-codex", "GPT-5.1 Codex"), + GPT_5_3_CODEX("openai", "gpt-5.3-codex", "GPT-5.3 Codex"), + + // ========================= + // OpenAI — GPT-4.x Family + // ========================= + GPT_4_1("openai", "gpt-4.1", "GPT-4.1"), + GPT_4_1_MINI("openai", "gpt-4.1-mini", "GPT-4.1 Mini"), + GPT_4_1_NANO("openai", "gpt-4.1-nano", "GPT-4.1 Nano"), + + GPT_4O("openai", "gpt-4o", "GPT-4o"), + GPT_4O_MINI("openai", "gpt-4o-mini", "GPT-4o Mini"), + + // ========================= + // OpenAI — Reasoning (o-series) + // ========================= + O1("openai", "o1", "o1"), + O1_MINI("openai", "o1-mini", "o1 Mini"), + + O3("openai", "o3", "o3"), + O3_MINI("openai", "o3-mini", "o3 Mini"), + + O4("openai", "o4", "o4"), + O4_MINI("openai", "o4-mini", "o4 Mini"), + + // ========================= + // Anthropic — Claude Opus + // ========================= + CLAUDE_OPUS_4("anthropic", "claude-4-opus", "Claude Opus 4"), + CLAUDE_OPUS_4_6("anthropic", "claude-4-opus-4.6", "Claude Opus 4.6"), + + CLAUDE_OPUS_3("anthropic", "claude-3-opus", "Claude Opus 3"), + + // ========================= + // Anthropic — Claude Sonnet + // ========================= + CLAUDE_SONNET_4("anthropic", "claude-4-sonnet", "Claude Sonnet 4"), + CLAUDE_SONNET_4_5("anthropic", "claude-4-sonnet-4.5", "Claude Sonnet 4.5"), + CLAUDE_SONNET_4_6("anthropic", "claude-4-sonnet-4.6", "Claude Sonnet 4.6"), + + CLAUDE_SONNET_3("anthropic", "claude-3-sonnet", "Claude Sonnet 3"), + CLAUDE_SONNET_3_5("anthropic", "claude-3-5-sonnet", "Claude Sonnet 3.5"), + + // ========================= + // Anthropic — Claude Haiku + // ========================= + CLAUDE_HAIKU_4("anthropic", "claude-4-haiku", "Claude Haiku 4"), + CLAUDE_HAIKU_3("anthropic", "claude-3-haiku", "Claude Haiku 3"), + + // Google + GEMINI_1_5_PRO("google", "gemini-1.5-pro", "Gemini 1.5 Pro"), + GEMINI_1_5_FLASH("google", "gemini-1.5-flash", "Gemini 1.5 Flash"), + + // Meta + LLAMA_3_70B("meta", "llama-3-70b", "LLaMA 3 70B"), + LLAMA_3_8B("meta", "llama-3-8b", "LLaMA 3 8B"), + + // Mistral + MISTRAL_LARGE("mistral", "mistral-large", "Mistral Large"), + MISTRAL_SMALL("mistral", "mistral-small", "Mistral Small"), + MIXTRAL_8X7B("mistral", "mixtral-8x7b", "Mixtral 8x7B"), + + // Cohere + COMMAND_R_PLUS("cohere", "command-r-plus", "Command R+"), + COMMAND_R("cohere", "command-r", "Command R"), + + // OSS / local + DEEPSEEK_CHAT("deepseek", "deepseek-chat", "DeepSeek Chat"), + DEEPSEEK_CODER("deepseek", "deepseek-coder", "DeepSeek Coder"), + QWEN_2("alibaba", "qwen-2", "Qwen 2"), + PHI_3("microsoft", "phi-3", "Phi-3"), + + // Fallback + OTHER("other", "other", "Other"); + + private final String provider; + private final String modelId; + private final String displayName; + + Agent(String provider, String modelId, String displayName) { + this.provider = provider; + this.modelId = modelId; + this.displayName = displayName; + } + + /** + * Returns the provider identifier (e.g. "openai", "anthropic"). + */ + public String provider() { + return provider; + } + + /** + * Returns a canonical model identifier useful for telemetry and + * analytics (for example "gpt-5.4" or "claude-4-opus-4.6"). + */ + public String modelId() { + return modelId; + } + + /** + * Human-friendly display name for the model. + */ + public String displayName() { + return displayName; + } + } + + /** + * Container annotation for repeatable {@link AiProvenance} entries. + * + * <p>Kept with RUNTIME retention to allow tools that read compiled class + * files to discover provenance entries. The primary {@link AiProvenance} + * annotation is SOURCE-retained; the container is provided to satisfy the + * repeatable contract when tools choose to materialize the annotations at + * runtime.</p> + */ + @Retention(RetentionPolicy.RUNTIME) + @Target({ ElementType.TYPE, ElementType.METHOD, ElementType.CONSTRUCTOR, ElementType.FIELD }) + @interface AiProvenances { + /** + * The contained provenance entries. + */ + AiProvenance[] value(); + } + + enum Tool { + + // Web / Chat UIs + CHATGPT_UI("openai", "chatgpt-ui", "ChatGPT"), + CLAUDE_UI("anthropic", "claude-ui", "Claude UI"), + GEMINI_UI("google", "gemini-ui", "Gemini UI"), + PERPLEXITY("perplexity", "perplexity", "Perplexity"), + + // IDE integrations + GITHUB_COPILOT("github", "copilot", "GitHub Copilot"), + GEMINI_CODE_ASSIST("google", "gemini-code-assist", "Gemini Code Assist"), + CURSOR("cursor", "cursor", "Cursor"), + WINDSURF("windsurf", "windsurf", "Windsurf"), + INTELLIJ_AI_ASSISTANT("jetbrains", "ai-assistant", "JetBrains AI Assistant"), + VSCODE_AI_EXTENSION("microsoft", "vscode-ai", "VS Code AI Extension"), + + // APIs / SDK usage + OPENAI_API("openai", "api", "OpenAI API"), + ANTHROPIC_API("anthropic", "api", "Anthropic API"), + GOOGLE_AI_API("google", "api", "Google AI API"), + GENERIC_API("generic", "api", "Generic API"), + + // Agent / orchestration platforms + FACTORY("factory", "factory-ai", "Factory.ai"), + LANGCHAIN("langchain", "langchain", "LangChain"), + LLAMAINDEX("llamaindex", "llamaindex", "LlamaIndex"), + CUSTOM_AGENT("custom", "custom-agent", "Custom Agent Runtime"), + + // CLI tools + OPENAI_CLI("openai", "cli", "OpenAI CLI"), + ANTHROPIC_CLI("anthropic", "cli", "Anthropic CLI"), + FACTORY_CLI("factory", "factory-cli", "Factory.ai CLI"), + + // Fallback + OTHER("other", "other", "Other"); + + private final String provider; + private final String id; + private final String displayName; + + Tool(String provider, String id, String displayName) { + this.provider = provider; + this.id = id; + this.displayName = displayName; + } + + /** + * Returns the provider identifier (e.g. "openai", "anthropic"). + */ + public String provider() { + return provider; + } + + /** + * Returns an identifier for the specific tool or integration + * (for example "api", "chatgpt-ui" or "copilot"). + */ + public String id() { + return id; + } + + /** + * Human-friendly display name for the tool. + */ + public String displayName() { + return displayName; + } + + /** + * Returns a compact qualified name composed of provider and id which + * is useful for logging and tagging (for example "factory/factory-cli"). + */ + public String qualifiedName() { + return provider + "/" + id; + } + } +} -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21044?usp=email To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings?usp=email Gerrit-MessageType: merged Gerrit-Project: asterixdb Gerrit-Branch: phoenix Gerrit-Change-Id: I00907d92d85ba9460b183794704a06ff73612329 Gerrit-Change-Number: 21044 Gerrit-PatchSet: 6 Gerrit-Owner: Michael Blow <[email protected]> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Ian Maxon <[email protected]> Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Michael Blow <[email protected]> Gerrit-Reviewer: Ritik Raj <[email protected]>
