refactor ChatCompletionsRequest (foundation-model-stack#375)

declark1 · mdevino · web-flow · commit 6bc045f0768b · 2025-04-18T09:59:56.000-07:00
Signed-off-by: declark1 &lt;44146800+declark1@users.noreply.github.com&gt;
Co-authored-by: Mateus Devino &lt;19861348+mdevino@users.noreply.github.com&gt;
diff --git a/src/clients/openai.rs b/src/clients/openai.rs
@@ -23,6 +23,7 @@ use futures::StreamExt;
 use http_body_util::BodyExt;
 use hyper::{HeaderMap, StatusCode};
 use serde::{Deserialize, Serialize};
+use serde_json::{Map, Value};
 use tokio::sync::mpsc;
 
 use super::{
@@ -32,7 +33,7 @@ use super::{
 use crate::{
     config::ServiceConfig,
     health::HealthCheckResult,
-    models::{DetectionWarningReason, DetectorParams},
+    models::{DetectionWarningReason, DetectorParams, ValidationError},
     orchestrator,
 };
 
@@ -167,122 +168,83 @@ impl From<ChatCompletion> for ChatCompletionsResponse {
     }
 }
 
-#[derive(Debug, Default, Clone, Serialize, Deserialize)]
-#[serde(deny_unknown_fields)]
+/// Represents a chat completions request.
+///
+/// As orchestrator is only concerned with a limited subset
+/// of request fields, we deserialize to an inner [`serde_json::Map`]
+/// and only validate and extract the fields used by this service.
+/// This type is then serialized to the inner [`serde_json::Map`].
+///
+/// This is to avoid tracking and updating OpenAI and vLLM
+/// parameter additions/changes. Full validation is delegated to
+/// the downstream server implementation.
+///
+/// Validated fields: detectors (internal), model, messages
+#[derive(Debug, Default, Clone, PartialEq, Deserialize)]
+#[serde(try_from = "Map<String, Value>")]
 pub struct ChatCompletionsRequest {
-    /// A list of messages comprising the conversation so far.
-    pub messages: Vec<Message>,
-    /// ID of the model to use.
-    pub model: String,
-    /// Whether or not to store the output of this chat completion request.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub store: Option<bool>,
-    /// Developer-defined tags and values.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub metadata: Option<serde_json::Value>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub frequency_penalty: Option<f32>,
-    /// Modify the likelihood of specified tokens appearing in the completion.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub logit_bias: Option<HashMap<String, f32>>,
-    /// Whether to return log probabilities of the output tokens or not.
-    /// If true, returns the log probabilities of each output token returned in the content of message.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub logprobs: Option<bool>,
-    /// An integer between 0 and 20 specifying the number of most likely tokens to return
-    /// at each token position, each with an associated log probability.
-    /// logprobs must be set to true if this parameter is used.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub top_logprobs: Option<u32>,
-    /// The maximum number of tokens that can be generated in the chat completion. (DEPRECATED)
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub max_tokens: Option<u32>,
-    /// An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub max_completion_tokens: Option<u32>,
-    /// How many chat completion choices to generate for each input message.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub n: Option<u32>,
-    /// Positive values penalize new tokens based on whether they appear in the text so far,
-    /// increasing the model's likelihood to talk about new topics.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub presence_penalty: Option<f32>,
-    /// An object specifying the format that the model must output.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub response_format: Option<ResponseFormat>,
-    /// If specified, our system will make a best effort to sample deterministically,
-    /// such that repeated requests with the same seed and parameters should return the same result.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub seed: Option<u64>,
-    /// Specifies the latency tier to use for processing the request.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub service_tier: Option<String>,
-    /// Up to 4 sequences where the API will stop generating further tokens.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub stop: Option<StopTokens>,
-    /// If set, partial message deltas will be sent, like in ChatGPT.
-    /// Tokens will be sent as data-only server-sent events as they become available,
-    /// with the stream terminated by a data: [DONE] message.
-    #[serde(default)]
+    /// Detector config.
+    pub detectors: DetectorConfig,
+    /// Stream parameter.
     pub stream: bool,
-    /// Options for streaming response. Only set this when you set stream: true.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub stream_options: Option<StreamOptions>,
-    /// What sampling temperature to use, between 0 and 2.
-    /// Higher values like 0.8 will make the output more random,
-    /// while lower values like 0.2 will make it more focused and deterministic.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub temperature: Option<f32>,
-    /// An alternative to sampling with temperature, called nucleus sampling,
-    /// where the model considers the results of the tokens with top_p probability mass.
-    /// So 0.1 means only the tokens comprising the top 10% probability mass are considered.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub top_p: Option<f32>,
-    /// A list of tools the model may call.
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    pub tools: Vec<Tool>,
-    /// Controls which (if any) tool is called by the model.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tool_choice: Option<ToolChoice>,
-    /// Whether to enable parallel function calling during tool use.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub parallel_tool_calls: Option<bool>,
-    /// A unique identifier representing your end-user.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub user: Option<String>,
+    /// Model name.
+    pub model: String,
+    /// Messages.
+    pub messages: Vec<Message>,
+    /// Inner request.
+    pub inner: Map<String, Value>,
+}
 
-    // Additional vllm params
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub best_of: Option<usize>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub use_beam_search: Option<bool>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub top_k: Option<isize>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub min_p: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub repetition_penalty: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub length_penalty: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub early_stopping: Option<bool>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub ignore_eos: Option<bool>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub min_tokens: Option<u32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub stop_token_ids: Option<Vec<usize>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub skip_special_tokens: Option<bool>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub spaces_between_special_tokens: Option<bool>,
+impl TryFrom<Map<String, Value>> for ChatCompletionsRequest {
+    type Error = ValidationError;
 
-    // Detectors
-    // Note: We are making it optional, since this structure also gets used to
-    // form request for chat completions. And downstream server, might choose to
-    // reject extra parameters.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub detectors: Option<DetectorConfig>,
+    fn try_from(mut value: Map<String, Value>) -> Result<Self, Self::Error> {
+        let detectors = if let Some(detectors) = value.remove("detectors") {
+            DetectorConfig::deserialize(detectors)
+                .map_err(|_| ValidationError::Invalid("error deserializing `detectors`".into()))?
+        } else {
+            DetectorConfig::default()
+        };
+        let stream = value
+            .get("stream")
+            .and_then(|v| v.as_bool())
+            .unwrap_or_default();
+        let model = if let Some(Value::String(model)) = value.get("model") {
+            Ok(model.clone())
+        } else {
+            Err(ValidationError::Required("model".into()))
+        }?;
+        if model.is_empty() {
+            return Err(ValidationError::Invalid("`model` must not be empty".into()));
+        }
+        let messages = if let Some(messages) = value.get("messages") {
+            Vec::<Message>::deserialize(messages)
+                .map_err(|_| ValidationError::Invalid("error deserializing `messages`".into()))
+        } else {
+            Err(ValidationError::Required("messages".into()))
+        }?;
+        if messages.is_empty() {
+            return Err(ValidationError::Invalid(
+                "`messages` must not be empty".into(),
+            ));
+        }
+        Ok(ChatCompletionsRequest {
+            detectors,
+            stream,
+            model,
+            messages,
+            inner: value,
+        })
+    }
+}
+
+impl Serialize for ChatCompletionsRequest {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        self.inner.serialize(serializer)
+    }
 }
 
 /// Structure to contain parameters for detectors.
@@ -291,7 +253,6 @@ pub struct ChatCompletionsRequest {
 pub struct DetectorConfig {
     #[serde(default, skip_serializing_if = "HashMap::is_empty")]
     pub input: HashMap<String, DetectorParams>,
-
     #[serde(default, skip_serializing_if = "HashMap::is_empty")]
     pub output: HashMap<String, DetectorParams>,
 }
@@ -369,7 +330,7 @@ pub enum Role {
     Tool,
 }
 
-#[derive(Debug, Default, Clone, Serialize, Deserialize)]
+#[derive(Debug, Default, Clone, PartialEq, Serialize, Deserialize)]
 #[serde(deny_unknown_fields)]
 pub struct Message {
     /// The role of the author of this message.
@@ -731,3 +692,103 @@ impl OrchestratorWarning {
         }
     }
 }
+
+#[cfg(test)]
+mod test {
+    use serde_json::json;
+
+    use super::*;
+
+    #[test]
+    fn test_chat_completions_request() -> Result<(), serde_json::Error> {
+        // Test deserialize
+        let detectors = DetectorConfig {
+            input: HashMap::from([("some_detector".into(), DetectorParams::new())]),
+            output: HashMap::new(),
+        };
+        let messages = vec![Message {
+            content: Some(Content::Text("Hi there!".to_string())),
+            ..Default::default()
+        }];
+        let json_request = json!({
+            "model": "test",
+            "detectors": detectors,
+            "messages": messages,
+        });
+        let request = ChatCompletionsRequest::deserialize(&json_request)?;
+        let mut inner = json_request.as_object().unwrap().to_owned();
+        inner.remove("detectors").unwrap();
+        assert_eq!(
+            request,
+            ChatCompletionsRequest {
+                detectors,
+                stream: false,
+                model: "test".into(),
+                messages: messages.clone(),
+                inner,
+            }
+        );
+
+        // Test deserialize with no detectors
+        let json_request = json!({
+            "model": "test",
+            "messages": messages,
+        });
+        let request = ChatCompletionsRequest::deserialize(&json_request)?;
+        let inner = json_request.as_object().unwrap().to_owned();
+        assert_eq!(
+            request,
+            ChatCompletionsRequest {
+                detectors: DetectorConfig::default(),
+                stream: false,
+                model: "test".into(),
+                messages: messages.clone(),
+                inner,
+            }
+        );
+
+        // Test deserialize validation errors
+        let result = ChatCompletionsRequest::deserialize(json!({
+            "detectors": DetectorConfig::default(),
+            "messages": messages,
+        }));
+        assert!(result.is_err_and(|error| error.to_string() == "`model` is required"));
+
+        let result = ChatCompletionsRequest::deserialize(json!({
+            "model": "",
+            "detectors": DetectorConfig::default(),
+            "messages": Vec::<Message>::default(),
+        }));
+        assert!(result.is_err_and(|error| error.to_string() == "`model` must not be empty"));
+
+        let result = ChatCompletionsRequest::deserialize(json!({
+            "model": "test",
+            "detectors": DetectorConfig::default(),
+            "messages": Vec::<Message>::default(),
+        }));
+        assert!(result.is_err_and(|error| error.to_string() == "`messages` must not be empty"));
+
+        let result = ChatCompletionsRequest::deserialize(json!({
+            "model": "test",
+            "detectors": DetectorConfig::default(),
+            "messages": ["invalid"],
+        }));
+        assert!(result.is_err_and(|error| error.to_string() == "error deserializing `messages`"));
+
+        // Test serialize
+        let serialized_request = serde_json::to_value(request)?;
+        assert_eq!(
+            serialized_request,
+            json!({
+                "model": "test",
+                "messages": [Message {
+                    content: Some(Content::Text("Hi there!".to_string())),
+                    role: Role::User,
+                    ..Default::default()
+                }],
+            })
+        );
+
+        Ok(())
+    }
+}
diff --git a/src/orchestrator/common/client.rs b/src/orchestrator/common/client.rs
@@ -247,10 +247,8 @@ pub async fn detect_text_context(
 pub async fn chat_completion(
     client: &OpenAiClient,
     headers: HeaderMap,
-    mut request: openai::ChatCompletionsRequest,
+    request: openai::ChatCompletionsRequest,
 ) -> Result<openai::ChatCompletionsResponse, Error> {
-    request.stream = false;
-    request.detectors = None;
     let model_id = request.model.clone();
     debug!(%model_id, ?request, "sending chat completions request");
     let response = client
@@ -269,10 +267,8 @@ pub async fn chat_completion(
 pub async fn chat_completion_stream(
     client: &OpenAiClient,
     headers: HeaderMap,
-    mut request: openai::ChatCompletionsRequest,
+    request: openai::ChatCompletionsRequest,
 ) -> Result<ChatCompletionStream, Error> {
-    request.stream = true;
-    request.detectors = None;
     let model_id = request.model.clone();
     debug!(%model_id, ?request, "sending chat completions stream request");
     let response = client
diff --git a/src/orchestrator/handlers/chat_completions_detection/streaming.rs b/src/orchestrator/handlers/chat_completions_detection/streaming.rs
@@ -30,7 +30,7 @@ pub async fn handle_streaming(
     task: ChatCompletionsDetectionTask,
 ) -> Result<ChatCompletionsResponse, Error> {
     let trace_id = task.trace_id;
-    let detectors = task.request.detectors.clone().unwrap_or_default();
+    let detectors = task.request.detectors.clone();
     info!(%trace_id, config = ?detectors, "task started");
     let _input_detectors = detectors.input;
     let _output_detectors = detectors.output;
diff --git a/src/orchestrator/handlers/chat_completions_detection/unary.rs b/src/orchestrator/handlers/chat_completions_detection/unary.rs
@@ -39,7 +39,7 @@ pub async fn handle_unary(
     task: ChatCompletionsDetectionTask,
 ) -> Result<ChatCompletionsResponse, Error> {
     let trace_id = task.trace_id;
-    let detectors = task.request.detectors.clone().unwrap_or_default();
+    let detectors = task.request.detectors.clone();
     info!(%trace_id, config = ?detectors, "task started");
     let input_detectors = detectors.input;
     let output_detectors = detectors.output;
diff --git a/tests/chat_completions_detection.rs b/tests/chat_completions_detection.rs