diff --git a/crates/openfang-kernel/src/kernel.rs b/crates/openfang-kernel/src/kernel.rs index 8f59414c97..deb543dc06 100644 --- a/crates/openfang-kernel/src/kernel.rs +++ b/crates/openfang-kernel/src/kernel.rs @@ -990,10 +990,27 @@ impl OpenFangKernel { configured_model.as_str() }; let api_key_env = config.memory.embedding_api_key_env.as_deref().unwrap_or(""); + // URL resolution (highest priority first): + // 1. [memory] embedding_base_url — explicit embedding-specific override + // 2. [provider_urls] — global per-provider override + // 3. [default_model] base_url — when embedding_provider matches the LLM provider let custom_url = config - .provider_urls - .get(provider.as_str()) - .map(|s| s.as_str()); + .memory + .embedding_base_url + .as_deref() + .or_else(|| { + config + .provider_urls + .get(provider.as_str()) + .map(|s| s.as_str()) + }) + .or_else(|| { + if config.default_model.provider == *provider { + config.default_model.base_url.as_deref() + } else { + None + } + }); match create_embedding_driver(provider, model, api_key_env, custom_url) { Ok(d) => { info!(provider = %provider, model = %model, "Embedding driver configured from memory config"); @@ -1025,7 +1042,18 @@ impl OpenFangKernel { } else { configured_model.as_str() }; - let custom_url = config.provider_urls.get(*provider).map(|s| s.as_str()); + let custom_url = config + .memory + .embedding_base_url + .as_deref() + .or_else(|| config.provider_urls.get(*provider).map(|s| s.as_str())) + .or_else(|| { + if config.default_model.provider == *provider { + config.default_model.base_url.as_deref() + } else { + None + } + }); match create_embedding_driver(provider, model, env_var, custom_url) { Ok(d) => { info!(provider = %provider, model = %model, "Embedding driver auto-detected via {}", env_var); @@ -1052,7 +1080,18 @@ impl OpenFangKernel { } else { configured_model.as_str() }; - let custom_url = config.provider_urls.get(*provider).map(|s| s.as_str()); + let custom_url = config + .memory + .embedding_base_url + .as_deref() + .or_else(|| config.provider_urls.get(*provider).map(|s| s.as_str())) + .or_else(|| { + if config.default_model.provider == *provider { + config.default_model.base_url.as_deref() + } else { + None + } + }); match create_embedding_driver(provider, model, "", custom_url) { Ok(d) => { info!(provider = %provider, model = %model, "Embedding driver auto-detected: {} (local)", provider); diff --git a/crates/openfang-runtime/src/embedding.rs b/crates/openfang-runtime/src/embedding.rs index c3245d879c..fcb2ee4143 100644 --- a/crates/openfang-runtime/src/embedding.rs +++ b/crates/openfang-runtime/src/embedding.rs @@ -6,8 +6,8 @@ use async_trait::async_trait; use openfang_types::model_catalog::{ - FIREWORKS_BASE_URL, GROQ_BASE_URL, LMSTUDIO_BASE_URL, MISTRAL_BASE_URL, OLLAMA_BASE_URL, - OPENAI_BASE_URL, TOGETHER_BASE_URL, VLLM_BASE_URL, + COHERE_BASE_URL, FIREWORKS_BASE_URL, GROQ_BASE_URL, LMSTUDIO_BASE_URL, MISTRAL_BASE_URL, + OLLAMA_BASE_URL, OPENAI_BASE_URL, TOGETHER_BASE_URL, VLLM_BASE_URL, }; use serde::{Deserialize, Serialize}; use tracing::{debug, warn}; @@ -189,34 +189,14 @@ pub fn create_embedding_driver( let base_url = custom_base_url .filter(|u| !u.is_empty()) - .map(|u| { - let trimmed = u.trim_end_matches('/'); - // All OpenAI-compatible embedding providers need /v1 in the path. - // If the user supplied a bare host URL (e.g. "http://192.168.0.1:11434"), - // append /v1 so the final request hits {base}/v1/embeddings. - let needs_v1 = matches!( - provider, - "openai" - | "groq" - | "together" - | "fireworks" - | "mistral" - | "ollama" - | "vllm" - | "lmstudio" - ); - if needs_v1 && !trimmed.ends_with("/v1") { - format!("{trimmed}/v1") - } else { - trimmed.to_string() - } - }) + .map(|u| u.trim_end_matches('/').to_string()) .unwrap_or_else(|| match provider { "openai" => OPENAI_BASE_URL.to_string(), "groq" => GROQ_BASE_URL.to_string(), "together" => TOGETHER_BASE_URL.to_string(), "fireworks" => FIREWORKS_BASE_URL.to_string(), "mistral" => MISTRAL_BASE_URL.to_string(), + "cohere" => COHERE_BASE_URL.to_string(), "ollama" => OLLAMA_BASE_URL.to_string(), "vllm" => VLLM_BASE_URL.to_string(), "lmstudio" => LMSTUDIO_BASE_URL.to_string(), @@ -395,7 +375,7 @@ mod tests { #[test] fn test_create_embedding_driver_custom_url_without_v1() { - // Custom URL missing /v1 should get it appended for known providers + // Custom URL is used as-is (caller is responsible for the full path) let driver = create_embedding_driver( "ollama", "nomic-embed-text", @@ -407,7 +387,7 @@ mod tests { #[test] fn test_create_embedding_driver_custom_url_trailing_slash() { - // Trailing slash should be trimmed before appending /v1 + // Trailing slash is trimmed from the custom URL let driver = create_embedding_driver( "ollama", "nomic-embed-text", @@ -416,4 +396,58 @@ mod tests { ); assert!(driver.is_ok()); } + + // ── Fix #1212: base_url override honored for openai embedding provider ── + + #[test] + fn test_create_embedding_driver_openai_custom_base_url() { + // [memory] embedding_base_url or [provider_urls] openai reaches the driver + // via custom_base_url. The URL must be accepted without modification. + let driver = create_embedding_driver( + "openai", + "text-embedding-3-small", + "", + Some("https://my-proxy.internal/v1"), + ); + assert!(driver.is_ok()); + assert_eq!(driver.unwrap().dimensions(), 1536); + } + + #[test] + fn test_create_embedding_driver_openai_custom_base_url_no_v1() { + // When the caller passes a bare host URL, it is used verbatim — + // the driver no longer auto-appends /v1 (the caller owns the full URL). + let driver = create_embedding_driver( + "openai", + "text-embedding-3-small", + "", + Some("https://my-proxy.internal"), + ); + assert!(driver.is_ok()); + } + + #[test] + fn test_create_embedding_driver_cohere_custom_url() { + // Cohere is no longer restricted by a hard-coded allowlist; + // a custom URL is accepted for any provider name. + let driver = create_embedding_driver( + "cohere", + "text-embedding-3-small", + "", + Some("https://cohere-proxy.internal/v1"), + ); + assert!(driver.is_ok()); + } + + #[test] + fn test_create_embedding_driver_unknown_provider_custom_url() { + // Unknown provider names with a custom URL are accepted. + let driver = create_embedding_driver( + "my-custom-provider", + "text-embedding-3-small", + "", + Some("https://custom.internal/v1"), + ); + assert!(driver.is_ok()); + } } diff --git a/crates/openfang-types/src/config.rs b/crates/openfang-types/src/config.rs index 25df9b0059..b176c603e7 100644 --- a/crates/openfang-types/src/config.rs +++ b/crates/openfang-types/src/config.rs @@ -1728,6 +1728,11 @@ pub struct MemoryConfig { /// Environment variable name for the embedding API key. #[serde(default)] pub embedding_api_key_env: Option, + /// Base URL override for the embedding API endpoint. + /// Takes priority over `[provider_urls]` and `[default_model] base_url`. + /// e.g. `"http://my-proxy.internal/v1"` or `"http://localhost:11434/v1"` + #[serde(default)] + pub embedding_base_url: Option, /// How often to run memory consolidation (hours). 0 = disabled. #[serde(default = "default_consolidation_interval")] pub consolidation_interval_hours: u64, @@ -1760,6 +1765,7 @@ impl Default for MemoryConfig { decay_rate: 0.1, embedding_provider: None, embedding_api_key_env: None, + embedding_base_url: None, consolidation_interval_hours: default_consolidation_interval(), backend: default_memory_backend(), http_url: None,