Skip to content

Commit d933da1

Browse files
authored
fix: (cherry-pick) sglang dsr1 recipes (#3854)
Signed-off-by: hongkuanz <[email protected]>
1 parent 781fa6c commit d933da1

File tree

2 files changed

+102
-70
lines changed

2 files changed

+102
-70
lines changed

recipes/deepseek-r1/sglang-wideep/tep16p-dep16d-disagg.yaml

Lines changed: 53 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -45,29 +45,38 @@ spec:
4545
path: /health
4646
port: 9090
4747
periodSeconds: 10
48-
timeoutSeconds: 1800
49-
failureThreshold: 60
48+
timeoutSeconds: 10
49+
failureThreshold: 600
5050
image: my-registry/sglang-wideep-runtime:my-tag
5151
workingDir: /workspace/components/backends/sglang
5252
command:
53-
- /bin/sh
54-
- -c
53+
- python3
54+
- -m
55+
- dynamo.sglang
5556
args:
56-
- >-
57-
exec python3 -m dynamo.sglang
58-
--model-path deepseek-ai/DeepSeek-R1
59-
--served-model-name deepseek-ai/DeepSeek-R1
60-
--tp 16
61-
--dp 16
62-
--enable-dp-attention
63-
--ep-size 16
64-
--trust-remote-code
65-
--skip-tokenizer-init
66-
--disaggregation-mode decode
67-
--disaggregation-transfer-backend nixl
68-
--disaggregation-bootstrap-port 30001
69-
--mem-fraction-static 0.8
70-
--host 0.0.0.0
57+
- --model-path
58+
- deepseek-ai/DeepSeek-R1
59+
- --served-model-name
60+
- deepseek-ai/DeepSeek-R1
61+
- --tp
62+
- "16"
63+
- --dp
64+
- "16"
65+
- --enable-dp-attention
66+
- --ep-size
67+
- "16"
68+
- --trust-remote-code
69+
- --skip-tokenizer-init
70+
- --disaggregation-mode
71+
- decode
72+
- --disaggregation-transfer-backend
73+
- nixl
74+
- --disaggregation-bootstrap-port
75+
- "30001"
76+
- --mem-fraction-static
77+
- "0.75"
78+
- --host
79+
- 0.0.0.0
7180
prefill:
7281
dynamoNamespace: sgl-dsr1-16gpu
7382
componentType: worker
@@ -89,24 +98,32 @@ spec:
8998
path: /health
9099
port: 9090
91100
periodSeconds: 10
92-
timeoutSeconds: 1800
93-
failureThreshold: 60
101+
timeoutSeconds: 10
102+
failureThreshold: 600
94103
image: my-registry/sglang-wideep-runtime:my-tag
95104
workingDir: /workspace/components/backends/sglang
96105
command:
97-
- /bin/sh
98-
- -c
106+
- python3
107+
- -m
108+
- dynamo.sglang
99109
args:
100-
- >-
101-
exec python3 -m dynamo.sglang
102-
--model-path deepseek-ai/DeepSeek-R1
103-
--served-model-name deepseek-ai/DeepSeek-R1
104-
--tp 16
105-
--ep-size 16
106-
--trust-remote-code
107-
--skip-tokenizer-init
108-
--disaggregation-mode prefill
109-
--disaggregation-transfer-backend nixl
110-
--disaggregation-bootstrap-port 30001
111-
--mem-fraction-static 0.8
112-
--host 0.0.0.0
110+
- --model-path
111+
- deepseek-ai/DeepSeek-R1
112+
- --served-model-name
113+
- deepseek-ai/DeepSeek-R1
114+
- --tp
115+
- "16"
116+
- --ep-size
117+
- "16"
118+
- --trust-remote-code
119+
- --skip-tokenizer-init
120+
- --disaggregation-mode
121+
- prefill
122+
- --disaggregation-transfer-backend
123+
- nixl
124+
- --disaggregation-bootstrap-port
125+
- "30001"
126+
- --mem-fraction-static
127+
- "0.75"
128+
- --host
129+
- 0.0.0.0

recipes/deepseek-r1/sglang-wideep/tep8p-dep8d-disagg.yaml

Lines changed: 49 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -43,28 +43,36 @@ spec:
4343
path: /health
4444
port: 9090
4545
periodSeconds: 10
46-
timeoutSeconds: 1800
47-
failureThreshold: 60
46+
timeoutSeconds: 10
47+
failureThreshold: 600
4848
image: my-registry/sglang-wideep-runtime:my-tag
4949
workingDir: /workspace/components/backends/sglang
5050
command:
51-
- /bin/sh
52-
- -c
51+
- python3
52+
- -m
53+
- dynamo.sglang
5354
args:
54-
- >-
55-
exec python3 -m dynamo.sglang
56-
--model-path deepseek-ai/DeepSeek-R1
57-
--served-model-name deepseek-ai/DeepSeek-R1
58-
--tp 8
59-
--dp 8
60-
--enable-dp-attention
61-
--ep-size 8
62-
--trust-remote-code
63-
--skip-tokenizer-init
64-
--disaggregation-mode decode
65-
--disaggregation-transfer-backend nixl
66-
--disaggregation-bootstrap-port 30001
67-
--host 0.0.0.0
55+
- --model-path
56+
- deepseek-ai/DeepSeek-R1
57+
- --served-model-name
58+
- deepseek-ai/DeepSeek-R1
59+
- --tp
60+
- "8"
61+
- --dp
62+
- "8"
63+
- --enable-dp-attention
64+
- --ep-size
65+
- "8"
66+
- --trust-remote-code
67+
- --skip-tokenizer-init
68+
- --disaggregation-mode
69+
- decode
70+
- --disaggregation-transfer-backend
71+
- nixl
72+
- --disaggregation-bootstrap-port
73+
- "30001"
74+
- --host
75+
- 0.0.0.0
6876
prefill:
6977
dynamoNamespace: sgl-dsr1-8gpu
7078
componentType: worker
@@ -84,23 +92,30 @@ spec:
8492
path: /health
8593
port: 9090
8694
periodSeconds: 10
87-
timeoutSeconds: 1800
88-
failureThreshold: 60
95+
timeoutSeconds: 10
96+
failureThreshold: 600
8997
image: my-registry/sglang-wideep-runtime:my-tag
9098
workingDir: /workspace/components/backends/sglang
9199
command:
92-
- /bin/sh
93-
- -c
100+
- python3
101+
- -m
102+
- dynamo.sglang
94103
args:
95-
- >-
96-
exec python3 -m dynamo.sglang
97-
--model-path deepseek-ai/DeepSeek-R1
98-
--served-model-name deepseek-ai/DeepSeek-R1
99-
--tp 8
100-
--ep-size 8
101-
--trust-remote-code
102-
--skip-tokenizer-init
103-
--disaggregation-mode prefill
104-
--disaggregation-transfer-backend nixl
105-
--disaggregation-bootstrap-port 30001
106-
--host 0.0.0.0
104+
- --model-path
105+
- deepseek-ai/DeepSeek-R1
106+
- --served-model-name
107+
- deepseek-ai/DeepSeek-R1
108+
- --tp
109+
- "8"
110+
- --ep-size
111+
- "8"
112+
- --trust-remote-code
113+
- --skip-tokenizer-init
114+
- --disaggregation-mode
115+
- prefill
116+
- --disaggregation-transfer-backend
117+
- nixl
118+
- --disaggregation-bootstrap-port
119+
- "30001"
120+
- --host
121+
- 0.0.0.0

0 commit comments

Comments
 (0)