Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions litellm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@
anthropic_key: Optional[str] = None
replicate_key: Optional[str] = None
bytez_key: Optional[str] = None
zai_key: Optional[str] = None
cohere_key: Optional[str] = None
infinity_key: Optional[str] = None
clarifai_key: Optional[str] = None
Expand Down Expand Up @@ -1059,6 +1060,7 @@ def add_known_models():
]

from .llms.bytez.chat.transformation import BytezChatConfig
from .llms.zai.chat.transformation import ZaiChatConfig
from .llms.custom_llm import CustomLLM
from .llms.bedrock.chat.converse_transformation import AmazonConverseConfig
from .llms.openai_like.chat.handler import OpenAILikeChatConfig
Expand Down
61 changes: 37 additions & 24 deletions litellm/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
DEFAULT_NUM_WORKERS_LITELLM_PROXY = int(
os.getenv("DEFAULT_NUM_WORKERS_LITELLM_PROXY", 1)
)
DYNAMIC_RATE_LIMIT_ERROR_THRESHOLD_PER_MINUTE = int(os.getenv("DYNAMIC_RATE_LIMIT_ERROR_THRESHOLD_PER_MINUTE", 1))
DYNAMIC_RATE_LIMIT_ERROR_THRESHOLD_PER_MINUTE = int(
os.getenv("DYNAMIC_RATE_LIMIT_ERROR_THRESHOLD_PER_MINUTE", 1)
)
DEFAULT_SQS_BATCH_SIZE = int(os.getenv("DEFAULT_SQS_BATCH_SIZE", 512))
SQS_SEND_MESSAGE_ACTION = "SendMessage"
SQS_API_VERSION = "2012-11-05"
Expand Down Expand Up @@ -107,22 +109,21 @@
DEFAULT_SSL_CIPHERS = os.getenv(
"LITELLM_SSL_CIPHERS",
# Priority 1: TLS 1.3 ciphers (fastest, ~50ms handshake)
"TLS_AES_256_GCM_SHA384:" # Fastest observed in testing
"TLS_AES_128_GCM_SHA256:" # Slightly faster than 256-bit
"TLS_CHACHA20_POLY1305_SHA256:" # Fast on ARM/mobile
"TLS_AES_256_GCM_SHA384:" # Fastest observed in testing
"TLS_AES_128_GCM_SHA256:" # Slightly faster than 256-bit
"TLS_CHACHA20_POLY1305_SHA256:" # Fast on ARM/mobile
# Priority 2: TLS 1.2 ECDHE+GCM (fast, ~100ms handshake, widely supported)
"ECDHE-RSA-AES256-GCM-SHA384:"
"ECDHE-RSA-AES128-GCM-SHA256:"
"ECDHE-ECDSA-AES256-GCM-SHA384:"
"ECDHE-ECDSA-AES128-GCM-SHA256:"
# Priority 3: Additional modern ciphers (good balance)
"ECDHE-RSA-CHACHA20-POLY1305:"
"ECDHE-ECDSA-CHACHA20-POLY1305:"
"ECDHE-RSA-CHACHA20-POLY1305:" "ECDHE-ECDSA-CHACHA20-POLY1305:"
# Priority 4: Widely compatible fallbacks (slower but universally supported)
"ECDHE-RSA-AES256-SHA384:" # Common fallback
"ECDHE-RSA-AES128-SHA256:" # Very widely supported
"AES256-GCM-SHA384:" # Non-PFS fallback (compatibility)
"AES128-GCM-SHA256", # Last resort (maximum compatibility)
"ECDHE-RSA-AES256-SHA384:" # Common fallback
"ECDHE-RSA-AES128-SHA256:" # Very widely supported
"AES256-GCM-SHA384:" # Non-PFS fallback (compatibility)
"AES128-GCM-SHA256", # Last resort (maximum compatibility)
)

########### v2 Architecture constants for managing writing updates to the database ###########
Expand Down Expand Up @@ -288,6 +289,7 @@
"openai",
"openai_like",
"bytez",
"zai",
"xai",
"custom_openai",
"text-completion-openai",
Expand Down Expand Up @@ -366,7 +368,8 @@
"vercel_ai_gateway",
"wandb",
"ovhcloud",
"lemonade"
"lemonade",
"zai",
]

LITELLM_EMBEDDING_PROVIDERS_SUPPORTING_INPUT_ARRAY_OF_TOKENS = [
Expand Down Expand Up @@ -625,7 +628,7 @@
"clarifai/qwen.qwenLM.Qwen3-14B",
"clarifai/qwen.qwenLM.QwQ-32B-AWQ",
"clarifai/anthropic.completion.claude-3_5-haiku",
"clarifai/anthropic.completion.claude-3_7-sonnet",
"clarifai/anthropic.completion.claude-3_7-sonnet",
]
)

Expand Down Expand Up @@ -791,28 +794,22 @@
# openai models
"openai/gpt-oss-120b",
"openai/gpt-oss-20b",

# zai-org models
"zai-org/GLM-4.5",

# Qwen models
"Qwen/Qwen3-235B-A22B-Instruct-2507",
"Qwen/Qwen3-Coder-480B-A35B-Instruct",
"Qwen/Qwen3-235B-A22B-Thinking-2507",

# moonshotai
"moonshotai/Kimi-K2-Instruct",

# meta models
"meta-llama/Llama-3.1-8B-Instruct",
"meta-llama/Llama-3.3-70B-Instruct",
"meta-llama/Llama-4-Scout-17B-16E-Instruct",

# deepseek-ai
"deepseek-ai/DeepSeek-V3.1",
"deepseek-ai/DeepSeek-R1-0528",
"deepseek-ai/DeepSeek-V3-0324",

# microsoft
"microsoft/Phi-4-mini-instruct",
]
Expand Down Expand Up @@ -1026,7 +1023,9 @@

# Key Rotation Constants
LITELLM_KEY_ROTATION_ENABLED = os.getenv("LITELLM_KEY_ROTATION_ENABLED", "false")
LITELLM_KEY_ROTATION_CHECK_INTERVAL_SECONDS = int(os.getenv("LITELLM_KEY_ROTATION_CHECK_INTERVAL_SECONDS", 86400)) # 24 hours default
LITELLM_KEY_ROTATION_CHECK_INTERVAL_SECONDS = int(
os.getenv("LITELLM_KEY_ROTATION_CHECK_INTERVAL_SECONDS", 86400)
) # 24 hours default
UI_SESSION_TOKEN_TEAM_ID = "litellm-dashboard"
LITELLM_PROXY_ADMIN_NAME = "default_user_id"

Expand Down Expand Up @@ -1054,14 +1053,28 @@
PROXY_BUDGET_RESCHEDULER_MAX_TIME = int(
os.getenv("PROXY_BUDGET_RESCHEDULER_MAX_TIME", 605)
)
PROXY_BATCH_WRITE_AT = int(os.getenv("PROXY_BATCH_WRITE_AT", 10)) # in seconds, increased from 10
PROXY_BATCH_WRITE_AT = int(
os.getenv("PROXY_BATCH_WRITE_AT", 10)
) # in seconds, increased from 10

# APScheduler Configuration - MEMORY LEAK FIX
# These settings prevent memory leaks in APScheduler's normalize() and _apply_jitter() functions
APSCHEDULER_COALESCE = os.getenv("APSCHEDULER_COALESCE", "True").lower() in ["true", "1"] # collapse many missed runs into one
APSCHEDULER_MISFIRE_GRACE_TIME = int(os.getenv("APSCHEDULER_MISFIRE_GRACE_TIME", 3600)) # ignore runs older than 1 hour (was 120)
APSCHEDULER_MAX_INSTANCES = int(os.getenv("APSCHEDULER_MAX_INSTANCES", 1)) # prevent concurrent job instances
APSCHEDULER_REPLACE_EXISTING = os.getenv("APSCHEDULER_REPLACE_EXISTING", "True").lower() in ["true", "1"] # always replace existing jobs
APSCHEDULER_COALESCE = os.getenv("APSCHEDULER_COALESCE", "True").lower() in [
"true",
"1",
] # collapse many missed runs into one
APSCHEDULER_MISFIRE_GRACE_TIME = int(
os.getenv("APSCHEDULER_MISFIRE_GRACE_TIME", 3600)
) # ignore runs older than 1 hour (was 120)
APSCHEDULER_MAX_INSTANCES = int(
os.getenv("APSCHEDULER_MAX_INSTANCES", 1)
) # prevent concurrent job instances
APSCHEDULER_REPLACE_EXISTING = os.getenv(
"APSCHEDULER_REPLACE_EXISTING", "True"
).lower() in [
"true",
"1",
] # always replace existing jobs

DEFAULT_HEALTH_CHECK_INTERVAL = int(
os.getenv("DEFAULT_HEALTH_CHECK_INTERVAL", 300)
Expand Down
2 changes: 2 additions & 0 deletions litellm/litellm_core_utils/get_llm_provider_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,8 @@ def get_llm_provider( # noqa: PLR0915
# bytez models
elif model.startswith("bytez/"):
custom_llm_provider = "bytez"
elif model.startswith("zai/"):
custom_llm_provider = "zai"
elif model.startswith("lemonade/"):
custom_llm_provider = "lemonade"
elif model.startswith("heroku/"):
Expand Down
Loading
Loading