feat: add custom prompt config in the request and return prompt in th… · googleapis/googleapis@1a14153
@@ -779,19 +779,20 @@ message ExplicitDecodingConfig {
779779780780// Configuration to enable speaker diarization.
781781message SpeakerDiarizationConfig {
782-// Required. Minimum number of speakers in the conversation. This range gives
783-// you more flexibility by allowing the system to automatically determine the
784-// correct number of speakers.
785-//
786-// To fix the number of speakers detected in the audio, set
787-// `min_speaker_count` = `max_speaker_count`.
788-int32 min_speaker_count = 2 [(google.api.field_behavior) = REQUIRED];
789-790-// Required. Maximum number of speakers in the conversation. Valid values are:
791-// 1-6. Must be >= `min_speaker_count`. This range gives you more flexibility
792-// by allowing the system to automatically determine the correct number of
793-// speakers.
794-int32 max_speaker_count = 3 [(google.api.field_behavior) = REQUIRED];
782+// Optional. The system automatically determines the number of speakers. This
783+// value is not currently used.
784+int32 min_speaker_count = 2 [(google.api.field_behavior) = OPTIONAL];
785+786+// Optional. The system automatically determines the number of speakers. This
787+// value is not currently used.
788+int32 max_speaker_count = 3 [(google.api.field_behavior) = OPTIONAL];
789+}
790+791+// Configuration to enable custom prompt in chirp3.
792+message CustomPromptConfig {
793+// Optional. The custom instructions to override the existing instructions for
794+// chirp3.
795+string custom_prompt = 1 [(google.api.field_behavior) = OPTIONAL];
795796}
796797797798// Available recognition features.
@@ -846,21 +847,19 @@ message RecognitionFeatures {
846847// Mode for recognizing multi-channel audio.
847848MultiChannelMode multi_channel_mode = 17;
848849849-// Configuration to enable speaker diarization and set additional
850-// parameters to make diarization better suited for your application.
851-// When this is enabled, we send all the words from the beginning of the
852-// audio for the top alternative in every consecutive STREAMING responses.
853-// This is done in order to improve our speaker tags as our models learn to
854-// identify the speakers in the conversation over time.
855-// For non-streaming requests, the diarization results will be provided only
856-// in the top alternative of the FINAL SpeechRecognitionResult.
850+// Configuration to enable speaker diarization. To enable diarization, set
851+// this field to an empty SpeakerDiarizationConfig message.
857852SpeakerDiarizationConfig diarization_config = 9;
858853859854// Maximum number of recognition hypotheses to be returned.
860855// The server may return fewer than `max_alternatives`.
861856// Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of
862857// one. If omitted, will return a maximum of one.
863858int32 max_alternatives = 16;
859+860+// Optional. Configuration to enable custom prompt for chirp3.
861+CustomPromptConfig custom_prompt_config = 18
862+ [(google.api.field_behavior) = OPTIONAL];
864863}
865864866865// Transcription normalization configuration. Use transcription normalization
@@ -1066,6 +1065,13 @@ message RecognitionResponseMetadata {
1066106510671066// When available, billed audio seconds for the corresponding request.
10681067google.protobuf.Duration total_billed_duration = 6;
1068+1069+// Optional. Output only. Provides the prompt used for the recognition
1070+// request.
1071+optional string prompt = 10 [
1072+(google.api.field_behavior) = OUTPUT_ONLY,
1073+(google.api.field_behavior) = OPTIONAL
1074+ ];
10691075}
1070107610711077// Alternative hypotheses (a.k.a. n-best list).