diff --git a/src/api/client/requests/ChatRequest.ts b/src/api/client/requests/ChatRequest.ts index 31f09840..c9fb69a0 100644 --- a/src/api/client/requests/ChatRequest.ts +++ b/src/api/client/requests/ChatRequest.ts @@ -240,6 +240,13 @@ export interface ChatRequest { * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments */ presencePenalty?: number; + /** + * When enabled, the user's prompt will be sent to the model without + * any pre-processing. + * + * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments + */ + rawPrompting?: boolean; /** * A list of available tools (functions) that the model may suggest invoking before producing a text response. * diff --git a/src/api/client/requests/ChatStreamRequest.ts b/src/api/client/requests/ChatStreamRequest.ts index a0bdd026..16905709 100644 --- a/src/api/client/requests/ChatStreamRequest.ts +++ b/src/api/client/requests/ChatStreamRequest.ts @@ -186,6 +186,13 @@ export interface ChatStreamRequest { * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments */ presencePenalty?: number; + /** + * When enabled, the user's prompt will be sent to the model without + * any pre-processing. + * + * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments + */ + rawPrompting?: boolean; /** * A list of available tools (functions) that the model may suggest invoking before producing a text response. * diff --git a/src/api/resources/v2/client/requests/V2ChatRequest.ts b/src/api/resources/v2/client/requests/V2ChatRequest.ts index 2e31285f..b389bdc2 100644 --- a/src/api/resources/v2/client/requests/V2ChatRequest.ts +++ b/src/api/resources/v2/client/requests/V2ChatRequest.ts @@ -127,9 +127,11 @@ export interface V2ChatRequest { */ safetyMode?: Cohere.V2ChatRequestSafetyMode; /** - * The maximum number of tokens the model will generate as part of the response. + * The maximum number of output tokens the model will generate in the response. If not set, `max_tokens` defaults to the model's maximum output token limit. You can find the maximum output token limits for each model in the [model documentation](https://docs.cohere.com/docs/models). * - * **Note**: Setting a low value may result in incomplete generations. + * **Note**: Setting a low value may result in incomplete generations. In such cases, the `finish_reason` field in the response will be set to `"MAX_TOKENS"`. + * + * **Note**: If `max_tokens` is set higher than the model's maximum output token limit, the generation will be capped at that model-specific maximum limit. */ maxTokens?: number; /** A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence. */ @@ -182,4 +184,11 @@ export interface V2ChatRequest { */ toolChoice?: Cohere.V2ChatRequestToolChoice; thinking?: Cohere.Thinking; + /** + * When enabled, the user's prompt will be sent to the model without + * any pre-processing. + * + * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments + */ + rawPrompting?: boolean; } diff --git a/src/api/resources/v2/client/requests/V2ChatStreamRequest.ts b/src/api/resources/v2/client/requests/V2ChatStreamRequest.ts index c4b551e8..6a9cbf07 100644 --- a/src/api/resources/v2/client/requests/V2ChatStreamRequest.ts +++ b/src/api/resources/v2/client/requests/V2ChatStreamRequest.ts @@ -114,9 +114,11 @@ export interface V2ChatStreamRequest { */ safetyMode?: Cohere.V2ChatStreamRequestSafetyMode; /** - * The maximum number of tokens the model will generate as part of the response. + * The maximum number of output tokens the model will generate in the response. If not set, `max_tokens` defaults to the model's maximum output token limit. You can find the maximum output token limits for each model in the [model documentation](https://docs.cohere.com/docs/models). * - * **Note**: Setting a low value may result in incomplete generations. + * **Note**: Setting a low value may result in incomplete generations. In such cases, the `finish_reason` field in the response will be set to `"MAX_TOKENS"`. + * + * **Note**: If `max_tokens` is set higher than the model's maximum output token limit, the generation will be capped at that model-specific maximum limit. */ maxTokens?: number; /** A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence. */ @@ -169,4 +171,11 @@ export interface V2ChatStreamRequest { */ toolChoice?: Cohere.V2ChatStreamRequestToolChoice; thinking?: Cohere.Thinking; + /** + * When enabled, the user's prompt will be sent to the model without + * any pre-processing. + * + * Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments + */ + rawPrompting?: boolean; } diff --git a/src/api/types/ChatMessageEndEventDelta.ts b/src/api/types/ChatMessageEndEventDelta.ts index 295f94b6..1f6d907f 100644 --- a/src/api/types/ChatMessageEndEventDelta.ts +++ b/src/api/types/ChatMessageEndEventDelta.ts @@ -2,7 +2,11 @@ * This file was auto-generated by Fern from our API Definition. */ +import * as Cohere from "../index"; + export interface ChatMessageEndEventDelta { /** An error message if an error occurred during the generation. */ error?: string; + finishReason?: Cohere.ChatFinishReason; + usage?: Cohere.Usage; } diff --git a/src/serialization/client/requests/ChatRequest.ts b/src/serialization/client/requests/ChatRequest.ts index 18768e22..9cbb18b4 100644 --- a/src/serialization/client/requests/ChatRequest.ts +++ b/src/serialization/client/requests/ChatRequest.ts @@ -41,6 +41,7 @@ export const ChatRequest: core.serialization.Schema< ), frequencyPenalty: core.serialization.property("frequency_penalty", core.serialization.number().optional()), presencePenalty: core.serialization.property("presence_penalty", core.serialization.number().optional()), + rawPrompting: core.serialization.property("raw_prompting", core.serialization.boolean().optional()), tools: core.serialization.list(Tool).optional(), toolResults: core.serialization.property("tool_results", core.serialization.list(ToolResult).optional()), forceSingleStep: core.serialization.property("force_single_step", core.serialization.boolean().optional()), @@ -69,6 +70,7 @@ export declare namespace ChatRequest { stop_sequences?: string[] | null; frequency_penalty?: number | null; presence_penalty?: number | null; + raw_prompting?: boolean | null; tools?: Tool.Raw[] | null; tool_results?: ToolResult.Raw[] | null; force_single_step?: boolean | null; diff --git a/src/serialization/client/requests/ChatStreamRequest.ts b/src/serialization/client/requests/ChatStreamRequest.ts index bc60cc92..0cb24cbb 100644 --- a/src/serialization/client/requests/ChatStreamRequest.ts +++ b/src/serialization/client/requests/ChatStreamRequest.ts @@ -41,6 +41,7 @@ export const ChatStreamRequest: core.serialization.Schema< ), frequencyPenalty: core.serialization.property("frequency_penalty", core.serialization.number().optional()), presencePenalty: core.serialization.property("presence_penalty", core.serialization.number().optional()), + rawPrompting: core.serialization.property("raw_prompting", core.serialization.boolean().optional()), tools: core.serialization.list(Tool).optional(), toolResults: core.serialization.property("tool_results", core.serialization.list(ToolResult).optional()), forceSingleStep: core.serialization.property("force_single_step", core.serialization.boolean().optional()), @@ -69,6 +70,7 @@ export declare namespace ChatStreamRequest { stop_sequences?: string[] | null; frequency_penalty?: number | null; presence_penalty?: number | null; + raw_prompting?: boolean | null; tools?: Tool.Raw[] | null; tool_results?: ToolResult.Raw[] | null; force_single_step?: boolean | null; diff --git a/src/serialization/resources/v2/client/requests/V2ChatRequest.ts b/src/serialization/resources/v2/client/requests/V2ChatRequest.ts index ab703faf..3af3b907 100644 --- a/src/serialization/resources/v2/client/requests/V2ChatRequest.ts +++ b/src/serialization/resources/v2/client/requests/V2ChatRequest.ts @@ -39,6 +39,7 @@ export const V2ChatRequest: core.serialization.Schema = core.serialization.object({ error: core.serialization.string().optional(), + finishReason: core.serialization.property("finish_reason", ChatFinishReason.optional()), + usage: Usage.optional(), }); export declare namespace ChatMessageEndEventDelta { export interface Raw { error?: string | null; + finish_reason?: ChatFinishReason.Raw | null; + usage?: Usage.Raw | null; } }