Skip to content

Commit 14be0ec

Browse files
committed
apps: fix llamastack and adapt agent
1 parent 70a0fe0 commit 14be0ec

File tree

5 files changed

+30
-112
lines changed

5 files changed

+30
-112
lines changed

clusters/homelab/apps/llm/llama-stack/agent/agent.py

Lines changed: 10 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -192,59 +192,6 @@ def get_models():
192192
data=[ModelInfo(id=alias) for alias in MODEL_ALIAS_MAP.keys()]
193193
)
194194

195-
@app.post("/v1/completions")
196-
def completions(request: CompletionRequest):
197-
internal_model = MODEL_ALIAS_MAP.get(request.model)
198-
if not internal_model:
199-
raise HTTPException(status_code=404, detail="Model not found")
200-
201-
if internal_model != MODEL_ID or not AGENT_INSTANCE or not AGENT_SESSION_ID:
202-
raise HTTPException(status_code=500, detail="Agent not initialized")
203-
204-
messages = request.prompt if isinstance(request.prompt, list) else [request.prompt]
205-
206-
response = AGENT_INSTANCE.create_turn(
207-
messages=[{"role": "user", "content": m} for m in messages],
208-
session_id=AGENT_SESSION_ID,
209-
stream=False,
210-
)
211-
212-
print(f"Response from agent: {response}")
213-
content = ""
214-
215-
try:
216-
if hasattr(response, "output_message"):
217-
content = response.output_message.content
218-
elif hasattr(response, "steps") and len(response.steps) > 0:
219-
step = response.steps[0]
220-
if hasattr(step, "api_model_response"):
221-
content = step.api_model_response.content
222-
223-
if not content:
224-
raise HTTPException(status_code=500, detail="No assistant response received")
225-
226-
except Exception as e:
227-
print(f"Error while processing the log: {e}")
228-
raise HTTPException(status_code=500, detail="Error processing the log")
229-
230-
if not content:
231-
raise HTTPException(status_code=500, detail="No assistant response received")
232-
233-
return {
234-
"id": "cmpl-1234",
235-
"object": "text_completion",
236-
"created": int(time.time()),
237-
"model": MODEL_ID,
238-
"choices": [
239-
{
240-
"text": content.strip(),
241-
"index": 0,
242-
"logprobs": None,
243-
"finish_reason": "stop"
244-
}
245-
]
246-
}
247-
248195
@app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
249196
def chat_completions(request: ChatCompletionRequest, raw_request: Request):
250197
with tracer.start_as_current_span("agent.chat_completions") as span:
@@ -260,10 +207,19 @@ def chat_completions(request: ChatCompletionRequest, raw_request: Request):
260207
raise HTTPException(status_code=500, detail="Agent not initialized")
261208

262209
try:
210+
logger.info(f"All incoming messages: {[msg.dict() for msg in request.messages]}")
211+
agent_messages = [
212+
{"role": "user", "content": msg.content}
213+
for msg in request.messages
214+
if msg.role == "user"
215+
]
216+
217+
if not agent_messages:
218+
raise HTTPException(status_code=400, detail="No user message found.")
263219
# Trace the agent's create_turn method
264220
with tracer.start_as_current_span("agent.create_turn"):
265221
response = AGENT_INSTANCE.create_turn(
266-
messages=[msg.dict() for msg in request.messages],
222+
messages=agent_messages,
267223
session_id=AGENT_SESSION_ID,
268224
stream=False,
269225
)

clusters/homelab/apps/llm/llama-stack/agent/cm-agent.yaml

Lines changed: 10 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -195,59 +195,6 @@ data:
195195
data=[ModelInfo(id=alias) for alias in MODEL_ALIAS_MAP.keys()]
196196
)
197197
198-
@app.post("/v1/completions")
199-
def completions(request: CompletionRequest):
200-
internal_model = MODEL_ALIAS_MAP.get(request.model)
201-
if not internal_model:
202-
raise HTTPException(status_code=404, detail="Model not found")
203-
204-
if internal_model != MODEL_ID or not AGENT_INSTANCE or not AGENT_SESSION_ID:
205-
raise HTTPException(status_code=500, detail="Agent not initialized")
206-
207-
messages = request.prompt if isinstance(request.prompt, list) else [request.prompt]
208-
209-
response = AGENT_INSTANCE.create_turn(
210-
messages=[{"role": "user", "content": m} for m in messages],
211-
session_id=AGENT_SESSION_ID,
212-
stream=False,
213-
)
214-
215-
print(f"Response from agent: {response}")
216-
content = ""
217-
218-
try:
219-
if hasattr(response, "output_message"):
220-
content = response.output_message.content
221-
elif hasattr(response, "steps") and len(response.steps) > 0:
222-
step = response.steps[0]
223-
if hasattr(step, "api_model_response"):
224-
content = step.api_model_response.content
225-
226-
if not content:
227-
raise HTTPException(status_code=500, detail="No assistant response received")
228-
229-
except Exception as e:
230-
print(f"Error while processing the log: {e}")
231-
raise HTTPException(status_code=500, detail="Error processing the log")
232-
233-
if not content:
234-
raise HTTPException(status_code=500, detail="No assistant response received")
235-
236-
return {
237-
"id": "cmpl-1234",
238-
"object": "text_completion",
239-
"created": int(time.time()),
240-
"model": MODEL_ID,
241-
"choices": [
242-
{
243-
"text": content.strip(),
244-
"index": 0,
245-
"logprobs": None,
246-
"finish_reason": "stop"
247-
}
248-
]
249-
}
250-
251198
@app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
252199
def chat_completions(request: ChatCompletionRequest, raw_request: Request):
253200
with tracer.start_as_current_span("agent.chat_completions") as span:
@@ -263,10 +210,19 @@ data:
263210
raise HTTPException(status_code=500, detail="Agent not initialized")
264211
265212
try:
213+
logger.info(f"All incoming messages: {[msg.dict() for msg in request.messages]}")
214+
agent_messages = [
215+
{"role": "user", "content": msg.content}
216+
for msg in request.messages
217+
if msg.role == "user"
218+
]
219+
220+
if not agent_messages:
221+
raise HTTPException(status_code=400, detail="No user message found.")
266222
# Trace the agent's create_turn method
267223
with tracer.start_as_current_span("agent.create_turn"):
268224
response = AGENT_INSTANCE.create_turn(
269-
messages=[msg.dict() for msg in request.messages],
225+
messages=agent_messages,
270226
session_id=AGENT_SESSION_ID,
271227
stream=False,
272228
)

clusters/homelab/apps/llm/llama-stack/agent/deployment.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ spec:
2525
- -c
2626
- |
2727
pip install \
28-
uvicorn llama-stack-client fastapi pydantic\
28+
uvicorn llama-stack-client fire fastapi pydantic\
2929
opentelemetry-instrumentation-fastapi \
3030
'opentelemetry-sdk>=1.26.0,<1.27.0' \
3131
'opentelemetry-api>=1.26.0,<1.27.0' \

clusters/homelab/apps/llm/llama-stack/configmap.yaml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ data:
2727
url: ${env.VLLM_URL:http://localhost:8000/v1}
2828
max_tokens: ${env.VLLM_MAX_TOKENS:2048}
2929
api_token: ${env.VLLM_API_TOKEN:fake}
30-
tls_verify: ${env.VLLM_TLS_VERIFY:true}
30+
tls_verify: false
3131
- provider_id: sentence-transformers
3232
provider_type: inline::sentence-transformers
3333
config: {}
@@ -52,6 +52,10 @@ data:
5252
type: sqlite
5353
namespace: null
5454
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
55+
responses_store:
56+
type: sqlite
57+
namespace: null
58+
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/responses_store.db
5559
eval:
5660
- provider_id: meta-reference
5761
provider_type: inline::meta-reference
@@ -140,5 +144,9 @@ data:
140144
provider_id: rag-runtime
141145
- toolgroup_id: builtin::wolfram_alpha
142146
provider_id: wolfram-alpha
147+
- toolgroup_id: mcp::opentelemetry
148+
provider_id: model-context-protocol
149+
mcp_endpoint:
150+
uri: "http://mcp-otel-operator:8000/sse"
143151
server:
144152
port: 8321

clusters/homelab/apps/llm/llama-stack/deployment.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@ spec:
3434
terminationMessagePath: /dev/termination-log
3535
name: llamastack
3636
env:
37-
- name: VLLM_TLS_VERIFY
38-
value: 'false'
3937
- name: VLLM_MAX_TOKENS
4038
value: '2048'
4139
- name: VLLM_URL

0 commit comments

Comments
 (0)