This is an automated email from the ASF dual-hosted git repository. sxnan pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/flink.git
commit 5e54fa37a9ab8d8b4b231ee21027fe5105001765 Author: Yunfeng Zhou <[email protected]> AuthorDate: Fri Oct 31 17:35:44 2025 +0800 [hotfix][model] Support automatic document generation --- docs/content.zh/docs/connectors/models/openai.md | 196 +-------------------- docs/content/docs/connectors/models/openai.md | 196 +-------------------- .../generated/model_openai_chat_section.html | 66 +++++++ .../generated/model_openai_common_section.html | 42 +++++ .../generated/model_openai_embedding_section.html | 18 ++ .../shortcodes/generated/openai_configuration.html | 102 +++++++++++ .../flink/annotation/docs/Documentation.java | 6 + flink-docs/pom.xml | 6 + .../docs/util/ConfigurationOptionLocator.java | 4 +- .../model/openai/AbstractOpenAIModelFunction.java | 62 +------ .../flink/model/openai/ContextOverflowAction.java | 12 +- .../model/openai/OpenAIChatModelFunction.java | 86 ++------- .../model/openai/OpenAIEmbeddingModelFunction.java | 12 +- .../model/openai/OpenAIModelProviderFactory.java | 32 ++-- .../apache/flink/model/openai/OpenAIOptions.java | 181 +++++++++++++++++++ 15 files changed, 474 insertions(+), 547 deletions(-) diff --git a/docs/content.zh/docs/connectors/models/openai.md b/docs/content.zh/docs/connectors/models/openai.md index 1632518508e..ec6da302015 100644 --- a/docs/content.zh/docs/connectors/models/openai.md +++ b/docs/content.zh/docs/connectors/models/openai.md @@ -82,205 +82,15 @@ FROM ML_PREDICT( ### 公共选项 -<table class="table table-bordered"> - <thead> - <tr> - <th class="text-left" style="width: 25%">参数</th> - <th class="text-center" style="width: 10%">是否必选</th> - <th class="text-center" style="width: 10%">默认值</th> - <th class="text-center" style="width: 10%">数据类型</th> - <th class="text-center" style="width: 45%">描述</th> - </tr> - </thead> - <tbody> - <tr> - <td> - <h5>provider</h5> - </td> - <td>必填</td> - <td style="word-wrap: break-word;">(none)</td> - <td>String</td> - <td>指定使用的模型提供方,必须为 'openai'。</td> - </tr> - <tr> - <td> - <h5>endpoint</h5> - </td> - <td>必填</td> - <td style="word-wrap: break-word;">(none)</td> - <td>String</td> - <td>OpenAI API端点的完整URL,例如:<code>https://api.openai.com/v1/chat/completions</code> 或 - <code>https://api.openai.com/v1/embeddings</code>。</td> - </tr> - <tr> - <td> - <h5>api-key</h5> - </td> - <td>必填</td> - <td style="word-wrap: break-word;">(none)</td> - <td>String</td> - <td>用于认证的OpenAI API密钥。</td> - </tr> - <tr> - <td> - <h5>model</h5> - </td> - <td>必填</td> - <td style="word-wrap: break-word;">(none)</td> - <td>String</td> - <td>模型名称,例如:<code>gpt-3.5-turbo</code>, <code>text-embedding-ada-002</code>。</td> - </tr> - <tr> - <td> - <h5>max-context-size</h5> - </td> - <td>可选</td> - <td style="word-wrap: break-word;">(none)</td> - <td>Integer</td> - <td>单个请求的最大上下文长度,单位为Token数量。当长度超过该值时,将使用context-overflow-action指定的溢出行为。</td> - </tr> - <tr> - <td> - <h5>context-overflow-action</h5> - </td> - <td>可选</td> - <td style="word-wrap: break-word;">(none)</td> - <td>String</td> - <td>处理上下文溢出的操作。支持的操作: - <ul> - <li><code>truncated-tail</code>(默认): 从上下文尾部截断超出的token。</li> - <li><code>truncated-tail-log</code>: 从上下文尾部截断超出的token。记录截断日志。</li> - <li><code>truncated-head</code>: 从上下文头部截断超出的token。</li> - <li><code>truncated-head-log</code>: 从上下文头部截断超出的token。记录截断日志。</li> - <li><code>skipped</code>: 跳过输入行。</li> - <li><code>skipped-log</code>: 跳过输入行。记录跳过日志。</li> - </ul> - </td> - </tr> - </tbody> -</table> +{{< generated/model_openai_common_section >}} ### Chat Completions -<table class="table table-bordered"> - <thead> - <tr> - <th class="text-left" style="width: 25%">参数</th> - <th class="text-center" style="width: 10%">是否必选</th> - <th class="text-center" style="width: 10%">默认值</th> - <th class="text-center" style="width: 10%">数据类型</th> - <th class="text-center" style="width: 45%">描述</th> - </tr> - </thead> - <tbody> - <tr> - <td> - <h5>system-prompt</h5> - </td> - <td>可选</td> - <td style="word-wrap: break-word;">"You are a helpful assistant."</td> - <td>String</td> - <td>用于聊天任务的系统提示信息。</td> - </tr> - <tr> - <td> - <h5>temperature</h5> - </td> - <td>可选</td> - <td style="word-wrap: break-word;">null</td> - <td>Double</td> - <td>控制输出的随机性,取值范围<code>[0.0, 1.0]</code>。参考<a href="https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature">temperature</a></td> - </tr> - <tr> - <td> - <h5>top-p</h5> - </td> - <td>可选</td> - <td style="word-wrap: break-word;">null</td> - <td>Double</td> - <td>用于替代temperature的概率阈值。参考<a href="https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p">top_p</a></td> - </tr> - <tr> - <td> - <h5>stop</h5> - </td> - <td>可选</td> - <td style="word-wrap: break-word;">null</td> - <td>String</td> - <td>停止序列,逗号分隔的列表。参考<a href="https://platform.openai.com/docs/api-reference/chat/create#chat-create-stop">stop</a></td> - </tr> - <tr> - <td> - <h5>max-tokens</h5> - </td> - <td>可选</td> - <td style="word-wrap: break-word;">null</td> - <td>Long</td> - <td>生成的最大token数。参考<a href="https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens">max tokens</a></td> - </tr> - <tr> - <td> - <h5>presence-penalty</h5> - </td> - <td>可选</td> - <td style="word-wrap: break-word;">(none)</td> - <td>Double</td> - <td>数值范围为-2.0到2.0之间。正值会根据新token是否出现在当前文本中对其进行惩罚,从而增加模型讨论新话题的可能性。</td> - </tr> - <tr> - <td> - <h5>n</h5> - </td> - <td>可选</td> - <td style="word-wrap: break-word;">(none)</td> - <td>Long</td> - <td>为每个输入消息生成的聊天完成选项数量。请注意,您将根据所有选项生成的token数量进行收费。为最小化成本,需将n保持为1。</td> - </tr> - <tr> - <td> - <h5>seed</h5> - </td> - <td>可选</td> - <td style="word-wrap: break-word;">(none)</td> - <td>Long</td> - <td>如果指定,模型平台将尽最大努力进行确定性采样,使得使用相同种子和参数的重复请求应返回相同的结果。但不保证结果一定是确定的。</td> - </tr> - <tr> - <td> - <h5>response-format</h5> - </td> - <td>可选</td> - <td style="word-wrap: break-word;">(none)</td> - <td>Enum</td> - <td>响应的格式,例如 'text' 或 'json_object'。</td> - </tr> - </tbody> -</table> +{{< generated/model_openai_chat_section >}} ### Embeddings -<table class="table table-bordered"> - <thead> - <tr> - <th class="text-left" style="width: 25%">参数</th> - <th class="text-center" style="width: 10%">是否必选</th> - <th class="text-center" style="width: 10%">默认值</th> - <th class="text-center" style="width: 10%">数据类型</th> - <th class="text-center" style="width: 45%">描述</th> - </tr> - </thead> - <tbody> - <tr> - <td> - <h5>dimension</h5> - </td> - <td>可选</td> - <td style="word-wrap: break-word;">null</td> - <td>Long</td> - <td>embedding向量的维度。参考<a href="https://platform.openai.com/docs/api-reference/embeddings/create#embeddings-create-dimensions">dimensions</a></td> - </tr> - </tbody> -</table> +{{< generated/model_openai_embedding_section >}} ## Schema要求 diff --git a/docs/content/docs/connectors/models/openai.md b/docs/content/docs/connectors/models/openai.md index 350d21387cb..58c2842c7b3 100644 --- a/docs/content/docs/connectors/models/openai.md +++ b/docs/content/docs/connectors/models/openai.md @@ -82,205 +82,15 @@ FROM ML_PREDICT( ### Common -<table class="table table-bordered"> - <thead> - <tr> - <th class="text-left" style="width: 25%">Option</th> - <th class="text-center" style="width: 8%">Required</th> - <th class="text-center" style="width: 7%">Default</th> - <th class="text-center" style="width: 10%">Type</th> - <th class="text-center" style="width: 50%">Description</th> - </tr> - </thead> - <tbody> - <tr> - <td> - <h5>provider</h5> - </td> - <td>required</td> - <td style="word-wrap: break-word;">(none)</td> - <td>String</td> - <td>Specifies the model function provider to use, must be 'openai'.</td> - </tr> - <tr> - <td> - <h5>endpoint</h5> - </td> - <td>required</td> - <td style="word-wrap: break-word;">(none)</td> - <td>String</td> - <td>Full URL of the OpenAI API endpoint, e.g. <code>https://api.openai.com/v1/chat/completions</code> or - <code>https://api.openai.com/v1/embeddings</code>.</td> - </tr> - <tr> - <td> - <h5>api-key</h5> - </td> - <td>required</td> - <td style="word-wrap: break-word;">(none)</td> - <td>String</td> - <td>OpenAI API key for authentication.</td> - </tr> - <tr> - <td> - <h5>model</h5> - </td> - <td>required</td> - <td style="word-wrap: break-word;">(none)</td> - <td>String</td> - <td>Model name, e.g. <code>gpt-3.5-turbo</code>, <code>text-embedding-ada-002</code>.</td> - </tr> - <tr> - <td> - <h5>max-context-size</h5> - </td> - <td>optional</td> - <td style="word-wrap: break-word;">(none)</td> - <td>Integer</td> - <td>Max number of tokens for context. context-overflow-action would be triggered if this threshold is exceeded.</td> - </tr> - <tr> - <td> - <h5>context-overflow-action</h5> - </td> - <td>optional</td> - <td style="word-wrap: break-word;">(none)</td> - <td>String</td> - <td>Action to handle context overflows. Supported actions: - <ul> - <li><code>truncated-tail</code>(default): Truncates exceeded tokens from the tail of the context.</li> - <li><code>truncated-tail-log</code>: Truncates exceeded tokens from the tail of the context. Records the truncation log.</li> - <li><code>truncated-head</code>: Truncates exceeded tokens from the head of the context.</li> - <li><code>truncated-head-log</code>: Truncates exceeded tokens from the head of the context. Records the truncation log.</li> - <li><code>skipped</code>: Skips the input row.</li> - <li><code>skipped-log</code>: Skips the input row. Records the skipping log.</li> - </ul> - </td> - </tr> - </tbody> -</table> +{{< generated/model_openai_common_section >}} ### Chat Completions -<table class="table table-bordered"> - <thead> - <tr> - <th class="text-left" style="width: 25%">Option</th> - <th class="text-center" style="width: 8%">Required</th> - <th class="text-center" style="width: 7%">Default</th> - <th class="text-center" style="width: 10%">Type</th> - <th class="text-center" style="width: 50%">Description</th> - </tr> - </thead> - <tbody> - <tr> - <td> - <h5>system-prompt</h5> - </td> - <td>optional</td> - <td style="word-wrap: break-word;">"You are a helpful assistant."</td> - <td>String</td> - <td>The input message for the system role.</td> - </tr> - <tr> - <td> - <h5>temperature</h5> - </td> - <td>optional</td> - <td style="word-wrap: break-word;">null</td> - <td>Double</td> - <td>Controls randomness of output, range <code>[0.0, 1.0]</code>. See <a href="https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature">temperature</a></td> - </tr> - <tr> - <td> - <h5>top-p</h5> - </td> - <td>optional</td> - <td style="word-wrap: break-word;">null</td> - <td>Double</td> - <td>Probability cutoff for token selection (used instead of temperature). See <a href="https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p">top_p</a></td> - </tr> - <tr> - <td> - <h5>stop</h5> - </td> - <td>optional</td> - <td style="word-wrap: break-word;">null</td> - <td>String</td> - <td>Stop sequences, comma-separated list. See <a href="https://platform.openai.com/docs/api-reference/chat/create#chat-create-stop">stop</a></td> - </tr> - <tr> - <td> - <h5>max-tokens</h5> - </td> - <td>optional</td> - <td style="word-wrap: break-word;">null</td> - <td>Long</td> - <td>Maximum number of tokens to generate. See <a href="https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens">max tokens</a></td> - </tr> - <tr> - <td> - <h5>presence-penalty</h5> - </td> - <td>optional</td> - <td style="word-wrap: break-word;">(none)</td> - <td>Double</td> - <td>Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.</td> - </tr> - <tr> - <td> - <h5>n</h5> - </td> - <td>optional</td> - <td style="word-wrap: break-word;">(none)</td> - <td>Long</td> - <td>How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.</td> - </tr> - <tr> - <td> - <h5>seed</h5> - </td> - <td>optional</td> - <td style="word-wrap: break-word;">(none)</td> - <td>Long</td> - <td>If specified, the model platform will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed.</td> - </tr> - <tr> - <td> - <h5>response-format</h5> - </td> - <td>optional</td> - <td style="word-wrap: break-word;">(none)</td> - <td>Enum</td> - <td>The format of the response, e.g., 'text' or 'json_object'.</td> - </tr> - </tbody> -</table> +{{< generated/model_openai_chat_section >}} ### Embeddings -<table class="table table-bordered"> - <thead> - <tr> - <th class="text-left" style="width: 25%">Option</th> - <th class="text-center" style="width: 8%">Required</th> - <th class="text-center" style="width: 7%">Default</th> - <th class="text-center" style="width: 10%">Type</th> - <th class="text-center" style="width: 50%">Description</th> - </tr> - </thead> - <tbody> - <tr> - <td> - <h5>dimension</h5> - </td> - <td>optional</td> - <td style="word-wrap: break-word;">null</td> - <td>Long</td> - <td>Dimension of the embedding vector. See <a href="https://platform.openai.com/docs/api-reference/embeddings/create#embeddings-create-dimensions">dimensions</a></td> - </tr> - </tbody> -</table> +{{< generated/model_openai_embedding_section >}} ## Schema Requirement diff --git a/docs/layouts/shortcodes/generated/model_openai_chat_section.html b/docs/layouts/shortcodes/generated/model_openai_chat_section.html new file mode 100644 index 00000000000..8d7da0f0b6e --- /dev/null +++ b/docs/layouts/shortcodes/generated/model_openai_chat_section.html @@ -0,0 +1,66 @@ +<table class="configuration table table-bordered"> + <thead> + <tr> + <th class="text-left" style="width: 20%">Key</th> + <th class="text-left" style="width: 15%">Default</th> + <th class="text-left" style="width: 10%">Type</th> + <th class="text-left" style="width: 55%">Description</th> + </tr> + </thead> + <tbody> + <tr> + <td><h5>max-tokens</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>Long</td> + <td>The maximum number of tokens that can be generated in the chat completion.</td> + </tr> + <tr> + <td><h5>n</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>Long</td> + <td>How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.</td> + </tr> + <tr> + <td><h5>presence-penalty</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>Double</td> + <td>Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.</td> + </tr> + <tr> + <td><h5>response-format</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td><p>Enum</p></td> + <td>The format of the response, e.g., 'text' or 'json_object'.<br /><br />Possible values:<ul><li>"text"</li><li>"json_object"</li></ul></td> + </tr> + <tr> + <td><h5>seed</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>Long</td> + <td>If specified, the model platform will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed.</td> + </tr> + <tr> + <td><h5>stop</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>String</td> + <td>A CSV list of strings to pass as stop sequences to the model.</td> + </tr> + <tr> + <td><h5>system-prompt</h5></td> + <td style="word-wrap: break-word;">"You are a helpful assistant."</td> + <td>String</td> + <td>The system message of a chat.</td> + </tr> + <tr> + <td><h5>temperature</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>Double</td> + <td>Controls the randomness or “creativity” of the output. Typical values are between 0.0 and 1.0.</td> + </tr> + <tr> + <td><h5>top-p</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>Double</td> + <td>The probability cutoff for token selection. Usually, either temperature or topP are specified, but not both.</td> + </tr> + </tbody> +</table> diff --git a/docs/layouts/shortcodes/generated/model_openai_common_section.html b/docs/layouts/shortcodes/generated/model_openai_common_section.html new file mode 100644 index 00000000000..e003338b58e --- /dev/null +++ b/docs/layouts/shortcodes/generated/model_openai_common_section.html @@ -0,0 +1,42 @@ +<table class="configuration table table-bordered"> + <thead> + <tr> + <th class="text-left" style="width: 20%">Key</th> + <th class="text-left" style="width: 15%">Default</th> + <th class="text-left" style="width: 10%">Type</th> + <th class="text-left" style="width: 55%">Description</th> + </tr> + </thead> + <tbody> + <tr> + <td><h5>api-key</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>String</td> + <td>OpenAI API key for authentication.</td> + </tr> + <tr> + <td><h5>context-overflow-action</h5></td> + <td style="word-wrap: break-word;">truncated-tail</td> + <td><p>Enum</p></td> + <td>Action to handle context overflows.<br /><br />Possible values:<ul><li>"truncated-tail": Truncates exceeded tokens from the tail of the context.</li><li>"truncated-tail-log": Truncates exceeded tokens from the tail of the context. Records the truncation log.</li><li>"truncated-head": Truncates exceeded tokens from the head of the context.</li><li>"truncated-head-log": Truncates exceeded tokens from the head of the context. Records the truncation log.</li><li>"skipped": Sk [...] + </tr> + <tr> + <td><h5>endpoint</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>String</td> + <td>Full URL of the OpenAI API endpoint, e.g., <code class="highlighter-rouge">https://api.openai.com/v1/chat/completions</code> or <code class="highlighter-rouge">https://api.openai.com/v1/embeddings</code></td> + </tr> + <tr> + <td><h5>max-context-size</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>Integer</td> + <td>Max number of tokens for context. context-overflow-action would be triggered if this threshold is exceeded.</td> + </tr> + <tr> + <td><h5>model</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>String</td> + <td>Model name, e.g., <code class="highlighter-rouge">gpt-3.5-turbo</code>, <code class="highlighter-rouge">text-embedding-ada-002</code>.</td> + </tr> + </tbody> +</table> diff --git a/docs/layouts/shortcodes/generated/model_openai_embedding_section.html b/docs/layouts/shortcodes/generated/model_openai_embedding_section.html new file mode 100644 index 00000000000..80b32fc545a --- /dev/null +++ b/docs/layouts/shortcodes/generated/model_openai_embedding_section.html @@ -0,0 +1,18 @@ +<table class="configuration table table-bordered"> + <thead> + <tr> + <th class="text-left" style="width: 20%">Key</th> + <th class="text-left" style="width: 15%">Default</th> + <th class="text-left" style="width: 10%">Type</th> + <th class="text-left" style="width: 55%">Description</th> + </tr> + </thead> + <tbody> + <tr> + <td><h5>dimension</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>Long</td> + <td>The size of the embedding result array.</td> + </tr> + </tbody> +</table> diff --git a/docs/layouts/shortcodes/generated/openai_configuration.html b/docs/layouts/shortcodes/generated/openai_configuration.html new file mode 100644 index 00000000000..6cea54aa9fe --- /dev/null +++ b/docs/layouts/shortcodes/generated/openai_configuration.html @@ -0,0 +1,102 @@ +<table class="configuration table table-bordered"> + <thead> + <tr> + <th class="text-left" style="width: 20%">Key</th> + <th class="text-left" style="width: 15%">Default</th> + <th class="text-left" style="width: 10%">Type</th> + <th class="text-left" style="width: 55%">Description</th> + </tr> + </thead> + <tbody> + <tr> + <td><h5>api-key</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>String</td> + <td>OpenAI API key for authentication.</td> + </tr> + <tr> + <td><h5>context-overflow-action</h5></td> + <td style="word-wrap: break-word;">truncated-tail</td> + <td><p>Enum</p></td> + <td>Action to handle context overflows.<br /><br />Possible values:<ul><li>"truncated-tail": Truncates exceeded tokens from the tail of the context.</li><li>"truncated-tail-log": Truncates exceeded tokens from the tail of the context. Records the truncation log.</li><li>"truncated-head": Truncates exceeded tokens from the head of the context.</li><li>"truncated-head-log": Truncates exceeded tokens from the head of the context. Records the truncation log.</li><li>"skipped": Sk [...] + </tr> + <tr> + <td><h5>dimension</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>Long</td> + <td>The size of the embedding result array.</td> + </tr> + <tr> + <td><h5>endpoint</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>String</td> + <td>Full URL of the OpenAI API endpoint, e.g., <code class="highlighter-rouge">https://api.openai.com/v1/chat/completions</code> or <code class="highlighter-rouge">https://api.openai.com/v1/embeddings</code></td> + </tr> + <tr> + <td><h5>max-context-size</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>Integer</td> + <td>Max number of tokens for context. context-overflow-action would be triggered if this threshold is exceeded.</td> + </tr> + <tr> + <td><h5>max-tokens</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>Long</td> + <td>The maximum number of tokens that can be generated in the chat completion.</td> + </tr> + <tr> + <td><h5>model</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>String</td> + <td>Model name, e.g., <code class="highlighter-rouge">gpt-3.5-turbo</code>, <code class="highlighter-rouge">text-embedding-ada-002</code>.</td> + </tr> + <tr> + <td><h5>n</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>Long</td> + <td>How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.</td> + </tr> + <tr> + <td><h5>presence-penalty</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>Double</td> + <td>Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.</td> + </tr> + <tr> + <td><h5>response-format</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td><p>Enum</p></td> + <td>The format of the response, e.g., 'text' or 'json_object'.<br /><br />Possible values:<ul><li>"text"</li><li>"json_object"</li></ul></td> + </tr> + <tr> + <td><h5>seed</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>Long</td> + <td>If specified, the model platform will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed.</td> + </tr> + <tr> + <td><h5>stop</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>String</td> + <td>A CSV list of strings to pass as stop sequences to the model.</td> + </tr> + <tr> + <td><h5>system-prompt</h5></td> + <td style="word-wrap: break-word;">"You are a helpful assistant."</td> + <td>String</td> + <td>The system message of a chat.</td> + </tr> + <tr> + <td><h5>temperature</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>Double</td> + <td>Controls the randomness or “creativity” of the output. Typical values are between 0.0 and 1.0.</td> + </tr> + <tr> + <td><h5>top-p</h5></td> + <td style="word-wrap: break-word;">(none)</td> + <td>Double</td> + <td>The probability cutoff for token selection. Usually, either temperature or topP are specified, but not both.</td> + </tr> + </tbody> +</table> diff --git a/flink-annotations/src/main/java/org/apache/flink/annotation/docs/Documentation.java b/flink-annotations/src/main/java/org/apache/flink/annotation/docs/Documentation.java index 48ec5ce5d08..8b298eac038 100644 --- a/flink-annotations/src/main/java/org/apache/flink/annotation/docs/Documentation.java +++ b/flink-annotations/src/main/java/org/apache/flink/annotation/docs/Documentation.java @@ -115,6 +115,12 @@ public final class Documentation { public static final String CHECKPOINT_FILE_MERGING = "checkpoint_file_merging"; + public static final String MODEL_OPENAI_COMMON = "model_openai_common"; + + public static final String MODEL_OPENAI_CHAT = "model_openai_chat"; + + public static final String MODEL_OPENAI_EMBEDDING = "model_openai_embedding"; + private Sections() {} } diff --git a/flink-docs/pom.xml b/flink-docs/pom.xml index f8d2efd8d7e..2ef932852e4 100644 --- a/flink-docs/pom.xml +++ b/flink-docs/pom.xml @@ -188,6 +188,12 @@ under the License. <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.apache.flink</groupId> + <artifactId>flink-model-openai</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-sql-gateway</artifactId> diff --git a/flink-docs/src/main/java/org/apache/flink/docs/util/ConfigurationOptionLocator.java b/flink-docs/src/main/java/org/apache/flink/docs/util/ConfigurationOptionLocator.java index d3deb0fd777..0b755272110 100644 --- a/flink-docs/src/main/java/org/apache/flink/docs/util/ConfigurationOptionLocator.java +++ b/flink-docs/src/main/java/org/apache/flink/docs/util/ConfigurationOptionLocator.java @@ -88,7 +88,9 @@ public class ConfigurationOptionLocator { "org.apache.flink.table.gateway.rest.util"), new OptionsClassLocation( "flink-external-resources/flink-external-resource-gpu", - "org.apache.flink.externalresource.gpu") + "org.apache.flink.externalresource.gpu"), + new OptionsClassLocation( + "flink-models/flink-model-openai", "org.apache.flink.model.openai") }; private static final Set<String> EXCLUSIONS = diff --git a/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/AbstractOpenAIModelFunction.java b/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/AbstractOpenAIModelFunction.java index cad550950b3..1d02580e7b1 100644 --- a/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/AbstractOpenAIModelFunction.java +++ b/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/AbstractOpenAIModelFunction.java @@ -18,10 +18,7 @@ package org.apache.flink.model.openai; -import org.apache.flink.configuration.ConfigOption; -import org.apache.flink.configuration.ConfigOptions; import org.apache.flink.configuration.ReadableConfig; -import org.apache.flink.configuration.description.Description; import org.apache.flink.table.api.config.ExecutionConfigOptions; import org.apache.flink.table.catalog.Column; import org.apache.flink.table.catalog.ResolvedSchema; @@ -44,59 +41,10 @@ import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; -import static org.apache.flink.configuration.description.TextElement.code; - /** Abstract parent class for {@link AsyncPredictFunction}s for OpenAI API. */ public abstract class AbstractOpenAIModelFunction extends AsyncPredictFunction { private static final Logger LOG = LoggerFactory.getLogger(AbstractOpenAIModelFunction.class); - public static final ConfigOption<String> ENDPOINT = - ConfigOptions.key("endpoint") - .stringType() - .noDefaultValue() - .withDescription( - Description.builder() - .text( - "Full URL of the OpenAI API endpoint, e.g., %s or %s", - code("https://api.openai.com/v1/chat/completions"), - code("https://api.openai.com/v1/embeddings")) - .build()); - - public static final ConfigOption<String> API_KEY = - ConfigOptions.key("api-key") - .stringType() - .noDefaultValue() - .withDescription("OpenAI API key for authentication."); - - public static final ConfigOption<String> MODEL = - ConfigOptions.key("model") - .stringType() - .noDefaultValue() - .withDescription( - Description.builder() - .text( - "Model name, e.g., %s, %s.", - code("gpt-3.5-turbo"), code("text-embedding-ada-002")) - .build()); - - public static final ConfigOption<Integer> MAX_CONTEXT_SIZE = - ConfigOptions.key("max-context-size") - .intType() - .noDefaultValue() - .withDescription( - "Max number of tokens for context. context-overflow-action would be triggered if this threshold is exceeded."); - - public static final ConfigOption<ContextOverflowAction> CONTEXT_OVERFLOW_ACTION = - ConfigOptions.key("context-overflow-action") - .enumType(ContextOverflowAction.class) - .defaultValue(ContextOverflowAction.TRUNCATED_TAIL) - .withDescription( - Description.builder() - .text("Action to handle context overflows. Supported actions:") - .linebreak() - .text(ContextOverflowAction.getAllValuesAndDescriptions()) - .build()); - protected transient OpenAIClientAsync client; private final int numRetry; @@ -108,9 +56,9 @@ public abstract class AbstractOpenAIModelFunction extends AsyncPredictFunction { public AbstractOpenAIModelFunction( ModelProviderFactory.Context factoryContext, ReadableConfig config) { - String endpoint = config.get(ENDPOINT); + String endpoint = config.get(OpenAIOptions.ENDPOINT); this.baseUrl = endpoint.replaceAll(String.format("/%s/*$", getEndpointSuffix()), ""); - this.apiKey = config.get(API_KEY); + this.apiKey = config.get(OpenAIOptions.API_KEY); // The model service enforces rate-limiting constraints, necessitating retry mechanisms in // most operational scenarios. Within the asynchronous operator framework, the system is // designed to process up to @@ -121,9 +69,9 @@ public abstract class AbstractOpenAIModelFunction extends AsyncPredictFunction { // resilience while maintaining throughput efficiency. this.numRetry = config.get(ExecutionConfigOptions.TABLE_EXEC_ASYNC_LOOKUP_BUFFER_CAPACITY) * 10; - this.model = config.get(MODEL); - this.maxContextSize = config.get(MAX_CONTEXT_SIZE); - this.contextOverflowAction = config.get(CONTEXT_OVERFLOW_ACTION); + this.model = config.get(OpenAIOptions.MODEL); + this.maxContextSize = config.get(OpenAIOptions.MAX_CONTEXT_SIZE); + this.contextOverflowAction = config.get(OpenAIOptions.CONTEXT_OVERFLOW_ACTION); validateSingleColumnSchema( factoryContext.getCatalogModel().getResolvedInputSchema(), diff --git a/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/ContextOverflowAction.java b/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/ContextOverflowAction.java index 08f5c19fcbe..0eeec880828 100644 --- a/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/ContextOverflowAction.java +++ b/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/ContextOverflowAction.java @@ -17,6 +17,9 @@ package org.apache.flink.model.openai; +import org.apache.flink.configuration.DescribedEnum; +import org.apache.flink.configuration.description.InlineElement; + import com.knuddels.jtokkit.Encodings; import com.knuddels.jtokkit.api.Encoding; import com.knuddels.jtokkit.api.EncodingRegistry; @@ -34,8 +37,10 @@ import java.util.Objects; import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; +import static org.apache.flink.configuration.description.TextElement.text; + /** Context overflow action. */ -public enum ContextOverflowAction { +public enum ContextOverflowAction implements DescribedEnum { TRUNCATED_TAIL("truncated-tail", "Truncates exceeded tokens from the tail of the context.") { @Override public String processTokensWithLimitInternal( @@ -166,6 +171,11 @@ public enum ContextOverflowAction { abstract @Nullable String processTokensWithLimitInternal( Encoding encoding, String input, int maxContextSize, int actualNumTokens); + @Override + public InlineElement getDescription() { + return text(description); + } + @Override public String toString() { return value; diff --git a/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/OpenAIChatModelFunction.java b/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/OpenAIChatModelFunction.java index c4df81ccb88..3e96a4a2d9a 100644 --- a/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/OpenAIChatModelFunction.java +++ b/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/OpenAIChatModelFunction.java @@ -17,8 +17,6 @@ package org.apache.flink.model.openai; -import org.apache.flink.configuration.ConfigOption; -import org.apache.flink.configuration.ConfigOptions; import org.apache.flink.configuration.Configuration; import org.apache.flink.configuration.ReadableConfig; import org.apache.flink.table.data.GenericRowData; @@ -46,72 +44,8 @@ public class OpenAIChatModelFunction extends AbstractOpenAIModelFunction { public static final String ENDPOINT_SUFFIX = "chat/completions"; - public static final ConfigOption<String> SYSTEM_PROMPT = - ConfigOptions.key("system-prompt") - .stringType() - .defaultValue("You are a helpful assistant.") - .withDescription("System message for chat tasks."); - - public static final ConfigOption<Double> TEMPERATURE = - ConfigOptions.key("temperature") - .doubleType() - .noDefaultValue() - .withDescription("Controls randomness of output, range [0.0, 1.0]."); - - public static final ConfigOption<Double> TOP_P = - ConfigOptions.key("top-p") - .doubleType() - .noDefaultValue() - .withDescription( - "Probability cutoff for token selection (used instead of temperature)."); - public static final String STOP_SEPARATOR = ","; - public static final ConfigOption<String> STOP = - ConfigOptions.key("stop") - .stringType() - .noDefaultValue() - .withDescription("Stop sequences, comma-separated list."); - - public static final ConfigOption<Long> MAX_TOKENS = - ConfigOptions.key("max-tokens") - .longType() - .noDefaultValue() - .withDescription("Maximum number of tokens to generate."); - - public static final ConfigOption<Double> PRESENCE_PENALTY = - ConfigOptions.key("presence-penalty") - .doubleType() - .noDefaultValue() - .withDescription( - "Number between -2.0 and 2.0." - + " Positive values penalize new tokens based on whether they appear in the text so far," - + " increasing the model's likelihood to talk about new topics."); - - public static final ConfigOption<Long> N = - ConfigOptions.key("n") - .longType() - .noDefaultValue() - .withDescription( - "How many chat completion choices to generate for each input message." - + " Note that you will be charged based on the number of generated tokens across all of the choices." - + " Keep n as 1 to minimize costs."); - - public static final ConfigOption<Long> SEED = - ConfigOptions.key("seed") - .longType() - .noDefaultValue() - .withDescription( - "If specified, the model platform will make a best effort to sample deterministically," - + " such that repeated requests with the same seed and parameters should return the same result." - + " Determinism is not guaranteed."); - - public static final ConfigOption<ChatModelResponseFormat> RESPONSE_FORMAT = - ConfigOptions.key("response-format") - .enumType(ChatModelResponseFormat.class) - .noDefaultValue() - .withDescription("The format of the response, e.g., 'text' or 'json_object'."); - private final String model; private final String systemPrompt; private final Configuration config; @@ -119,8 +53,8 @@ public class OpenAIChatModelFunction extends AbstractOpenAIModelFunction { public OpenAIChatModelFunction( ModelProviderFactory.Context factoryContext, ReadableConfig config) { super(factoryContext, config); - model = config.get(MODEL); - systemPrompt = config.get(SYSTEM_PROMPT); + model = config.get(OpenAIOptions.MODEL); + systemPrompt = config.get(OpenAIOptions.SYSTEM_PROMPT); this.config = Configuration.fromMap(config.toMap()); validateSingleColumnSchema( factoryContext.getCatalogModel().getResolvedOutputSchema(), @@ -140,17 +74,17 @@ public class OpenAIChatModelFunction extends AbstractOpenAIModelFunction { .addSystemMessage(systemPrompt) .addUserMessage(input) .model(model); - this.config.getOptional(TEMPERATURE).ifPresent(builder::temperature); - this.config.getOptional(TOP_P).ifPresent(builder::topP); + this.config.getOptional(OpenAIOptions.TEMPERATURE).ifPresent(builder::temperature); + this.config.getOptional(OpenAIOptions.TOP_P).ifPresent(builder::topP); this.config - .getOptional(STOP) + .getOptional(OpenAIOptions.STOP) .ifPresent(x -> builder.stopOfStrings(Arrays.asList(x.split(STOP_SEPARATOR)))); - this.config.getOptional(MAX_TOKENS).ifPresent(builder::maxTokens); - this.config.getOptional(PRESENCE_PENALTY).ifPresent(builder::presencePenalty); - this.config.getOptional(N).ifPresent(builder::n); - this.config.getOptional(SEED).ifPresent(builder::seed); + this.config.getOptional(OpenAIOptions.MAX_TOKENS).ifPresent(builder::maxTokens); + this.config.getOptional(OpenAIOptions.PRESENCE_PENALTY).ifPresent(builder::presencePenalty); + this.config.getOptional(OpenAIOptions.N).ifPresent(builder::n); + this.config.getOptional(OpenAIOptions.SEED).ifPresent(builder::seed); this.config - .getOptional(RESPONSE_FORMAT) + .getOptional(OpenAIOptions.RESPONSE_FORMAT) .ifPresent(x -> builder.responseFormat(x.getResponseFormat())); return client.chat() diff --git a/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/OpenAIEmbeddingModelFunction.java b/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/OpenAIEmbeddingModelFunction.java index a169d4f0856..854ca2cf5d0 100644 --- a/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/OpenAIEmbeddingModelFunction.java +++ b/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/OpenAIEmbeddingModelFunction.java @@ -17,8 +17,6 @@ package org.apache.flink.model.openai; -import org.apache.flink.configuration.ConfigOption; -import org.apache.flink.configuration.ConfigOptions; import org.apache.flink.configuration.ReadableConfig; import org.apache.flink.table.data.GenericArrayData; import org.apache.flink.table.data.GenericRowData; @@ -45,20 +43,14 @@ public class OpenAIEmbeddingModelFunction extends AbstractOpenAIModelFunction { public static final String ENDPOINT_SUFFIX = "embeddings"; - public static final ConfigOption<Long> DIMENSION = - ConfigOptions.key("dimension") - .longType() - .noDefaultValue() - .withDescription("Dimension of the embedding vector."); - private final String model; @Nullable private final Long dimensions; public OpenAIEmbeddingModelFunction( ModelProviderFactory.Context factoryContext, ReadableConfig config) { super(factoryContext, config); - model = config.get(MODEL); - dimensions = config.get(DIMENSION); + model = config.get(OpenAIOptions.MODEL); + dimensions = config.get(OpenAIOptions.DIMENSION); validateSingleColumnSchema( factoryContext.getCatalogModel().getResolvedOutputSchema(), diff --git a/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/OpenAIModelProviderFactory.java b/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/OpenAIModelProviderFactory.java index 6f573639ae2..4c5b340528a 100644 --- a/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/OpenAIModelProviderFactory.java +++ b/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/OpenAIModelProviderFactory.java @@ -36,7 +36,7 @@ public class OpenAIModelProviderFactory implements ModelProviderFactory { FactoryUtil.ModelProviderFactoryHelper helper = FactoryUtil.createModelProviderFactoryHelper(this, context); helper.validate(); - String endpoint = helper.getOptions().get(AbstractOpenAIModelFunction.ENDPOINT); + String endpoint = helper.getOptions().get(OpenAIOptions.ENDPOINT); endpoint = endpoint.replaceAll("/*$", "").toLowerCase(); AsyncPredictFunction function; @@ -58,27 +58,27 @@ public class OpenAIModelProviderFactory implements ModelProviderFactory { @Override public Set<ConfigOption<?>> requiredOptions() { Set<ConfigOption<?>> set = new HashSet<>(); - set.add(AbstractOpenAIModelFunction.ENDPOINT); - set.add(AbstractOpenAIModelFunction.API_KEY); - set.add(AbstractOpenAIModelFunction.MODEL); + set.add(OpenAIOptions.ENDPOINT); + set.add(OpenAIOptions.API_KEY); + set.add(OpenAIOptions.MODEL); return set; } @Override public Set<ConfigOption<?>> optionalOptions() { Set<ConfigOption<?>> set = new HashSet<>(); - set.add(AbstractOpenAIModelFunction.MAX_CONTEXT_SIZE); - set.add(AbstractOpenAIModelFunction.CONTEXT_OVERFLOW_ACTION); - set.add(OpenAIChatModelFunction.SYSTEM_PROMPT); - set.add(OpenAIChatModelFunction.TEMPERATURE); - set.add(OpenAIChatModelFunction.TOP_P); - set.add(OpenAIChatModelFunction.STOP); - set.add(OpenAIChatModelFunction.MAX_TOKENS); - set.add(OpenAIChatModelFunction.PRESENCE_PENALTY); - set.add(OpenAIChatModelFunction.N); - set.add(OpenAIChatModelFunction.SEED); - set.add(OpenAIChatModelFunction.RESPONSE_FORMAT); - set.add(OpenAIEmbeddingModelFunction.DIMENSION); + set.add(OpenAIOptions.MAX_CONTEXT_SIZE); + set.add(OpenAIOptions.CONTEXT_OVERFLOW_ACTION); + set.add(OpenAIOptions.SYSTEM_PROMPT); + set.add(OpenAIOptions.TEMPERATURE); + set.add(OpenAIOptions.TOP_P); + set.add(OpenAIOptions.STOP); + set.add(OpenAIOptions.MAX_TOKENS); + set.add(OpenAIOptions.PRESENCE_PENALTY); + set.add(OpenAIOptions.N); + set.add(OpenAIOptions.SEED); + set.add(OpenAIOptions.RESPONSE_FORMAT); + set.add(OpenAIOptions.DIMENSION); return set; } diff --git a/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/OpenAIOptions.java b/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/OpenAIOptions.java new file mode 100644 index 00000000000..8e605be51e0 --- /dev/null +++ b/flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/OpenAIOptions.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.model.openai; + +import org.apache.flink.annotation.Experimental; +import org.apache.flink.annotation.docs.Documentation; +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ConfigOptions; +import org.apache.flink.configuration.description.Description; + +import static org.apache.flink.configuration.description.TextElement.code; + +/** Options for OpenAI API Model Functions. */ +@Experimental +public class OpenAIOptions { + + // ------------------------------------------------------------------------ + // Common Options + // ------------------------------------------------------------------------ + + @Documentation.Section({Documentation.Sections.MODEL_OPENAI_COMMON}) + public static final ConfigOption<String> ENDPOINT = + ConfigOptions.key("endpoint") + .stringType() + .noDefaultValue() + .withDescription( + Description.builder() + .text( + "Full URL of the OpenAI API endpoint, e.g., %s or %s", + code("https://api.openai.com/v1/chat/completions"), + code("https://api.openai.com/v1/embeddings")) + .build()); + + @Documentation.Section({Documentation.Sections.MODEL_OPENAI_COMMON}) + public static final ConfigOption<String> API_KEY = + ConfigOptions.key("api-key") + .stringType() + .noDefaultValue() + .withDescription("OpenAI API key for authentication."); + + @Documentation.Section({Documentation.Sections.MODEL_OPENAI_COMMON}) + public static final ConfigOption<String> MODEL = + ConfigOptions.key("model") + .stringType() + .noDefaultValue() + .withDescription( + Description.builder() + .text( + "Model name, e.g., %s, %s.", + code("gpt-3.5-turbo"), code("text-embedding-ada-002")) + .build()); + + @Documentation.Section({Documentation.Sections.MODEL_OPENAI_COMMON}) + public static final ConfigOption<Integer> MAX_CONTEXT_SIZE = + ConfigOptions.key("max-context-size") + .intType() + .noDefaultValue() + .withDescription( + "Max number of tokens for context. context-overflow-action would be triggered if this threshold is exceeded."); + + @Documentation.Section({Documentation.Sections.MODEL_OPENAI_COMMON}) + public static final ConfigOption<ContextOverflowAction> CONTEXT_OVERFLOW_ACTION = + ConfigOptions.key("context-overflow-action") + .enumType(ContextOverflowAction.class) + .defaultValue(ContextOverflowAction.TRUNCATED_TAIL) + .withDescription( + Description.builder() + .text("Action to handle context overflows.") + .build()); + + // ------------------------------------------------------------------------ + // Options for Chat Completion Model Functions + // ------------------------------------------------------------------------ + + @Documentation.Section({Documentation.Sections.MODEL_OPENAI_CHAT}) + public static final ConfigOption<String> SYSTEM_PROMPT = + ConfigOptions.key("system-prompt") + .stringType() + .defaultValue("You are a helpful assistant.") + .withDeprecatedKeys("systemPrompt") + .withDescription("The system message of a chat."); + + @Documentation.Section({Documentation.Sections.MODEL_OPENAI_CHAT}) + public static final ConfigOption<Double> TEMPERATURE = + ConfigOptions.key("temperature") + .doubleType() + .noDefaultValue() + .withDescription( + "Controls the randomness or “creativity” of the output. Typical values are between 0.0 and 1.0."); + + @Documentation.Section({Documentation.Sections.MODEL_OPENAI_CHAT}) + public static final ConfigOption<Double> TOP_P = + ConfigOptions.key("top-p") + .doubleType() + .noDefaultValue() + .withDeprecatedKeys("topP") + .withDescription( + "The probability cutoff for token selection. Usually, either temperature or topP are specified, but not both."); + + @Documentation.Section({Documentation.Sections.MODEL_OPENAI_CHAT}) + public static final ConfigOption<String> STOP = + ConfigOptions.key("stop") + .stringType() + .noDefaultValue() + .withDescription( + "A CSV list of strings to pass as stop sequences to the model."); + + @Documentation.Section({Documentation.Sections.MODEL_OPENAI_CHAT}) + public static final ConfigOption<Long> MAX_TOKENS = + ConfigOptions.key("max-tokens") + .longType() + .noDefaultValue() + .withDeprecatedKeys("maxTokens") + .withDescription( + "The maximum number of tokens that can be generated in the chat completion."); + + @Documentation.Section({Documentation.Sections.MODEL_OPENAI_CHAT}) + public static final ConfigOption<Double> PRESENCE_PENALTY = + ConfigOptions.key("presence-penalty") + .doubleType() + .noDefaultValue() + .withDescription( + "Number between -2.0 and 2.0." + + " Positive values penalize new tokens based on whether they appear in the text so far," + + " increasing the model's likelihood to talk about new topics."); + + @Documentation.Section({Documentation.Sections.MODEL_OPENAI_CHAT}) + public static final ConfigOption<Long> N = + ConfigOptions.key("n") + .longType() + .noDefaultValue() + .withDescription( + "How many chat completion choices to generate for each input message." + + " Note that you will be charged based on the number of generated tokens across all of the choices." + + " Keep n as 1 to minimize costs."); + + @Documentation.Section({Documentation.Sections.MODEL_OPENAI_CHAT}) + public static final ConfigOption<Long> SEED = + ConfigOptions.key("seed") + .longType() + .noDefaultValue() + .withDescription( + "If specified, the model platform will make a best effort to sample deterministically," + + " such that repeated requests with the same seed and parameters should return the same result." + + " Determinism is not guaranteed."); + + @Documentation.Section({Documentation.Sections.MODEL_OPENAI_CHAT}) + public static final ConfigOption<OpenAIChatModelFunction.ChatModelResponseFormat> + RESPONSE_FORMAT = + ConfigOptions.key("response-format") + .enumType(OpenAIChatModelFunction.ChatModelResponseFormat.class) + .noDefaultValue() + .withDescription( + "The format of the response, e.g., 'text' or 'json_object'."); + + // ------------------------------------------------------------------------ + // Options for Embedding Model Functions + // ------------------------------------------------------------------------ + + @Documentation.Section({Documentation.Sections.MODEL_OPENAI_EMBEDDING}) + public static final ConfigOption<Long> DIMENSION = + ConfigOptions.key("dimension") + .longType() + .noDefaultValue() + .withDescription("The size of the embedding result array."); +}
