|
16 | 16 | } |
17 | 17 | ], |
18 | 18 | "shape_details": { |
19 | | - "available": true, |
20 | | - "core_count": 112, |
| 19 | + "available": false, |
| 20 | + "core_count": null, |
21 | 21 | "gpu_specs": { |
22 | | - "cpu_count": 112, |
| 22 | + "cpu_count": 64, |
23 | 23 | "cpu_memory_in_gbs": 1024, |
24 | 24 | "gpu_count": 4, |
25 | | - "gpu_memory_in_gbs": 192, |
26 | | - "gpu_type": "L40S", |
| 25 | + "gpu_memory_in_gbs": 96, |
| 26 | + "gpu_type": "A10", |
27 | 27 | "quantization": [ |
28 | 28 | "awq", |
29 | 29 | "gptq", |
30 | 30 | "marlin", |
31 | | - "fp8", |
32 | 31 | "int8", |
33 | 32 | "bitblas", |
34 | 33 | "aqlm", |
|
37 | 36 | "gguf" |
38 | 37 | ], |
39 | 38 | "ranking": { |
40 | | - "cost": 60, |
41 | | - "performance": 80 |
| 39 | + "cost": 50, |
| 40 | + "performance": 50 |
42 | 41 | } |
43 | 42 | }, |
44 | | - "memory_in_gbs": 1024, |
45 | | - "name": "BM.GPU.L40S-NC.4", |
| 43 | + "memory_in_gbs": null, |
| 44 | + "name": "BM.GPU.A10.4", |
46 | 45 | "shape_series": "GPU" |
47 | 46 | } |
48 | 47 | }, |
|
61 | 60 | } |
62 | 61 | ], |
63 | 62 | "shape_details": { |
64 | | - "available": true, |
65 | | - "core_count": 64, |
| 63 | + "available": false, |
| 64 | + "core_count": null, |
66 | 65 | "gpu_specs": { |
67 | | - "cpu_count": 64, |
| 66 | + "cpu_count": 112, |
68 | 67 | "cpu_memory_in_gbs": 1024, |
69 | 68 | "gpu_count": 4, |
70 | | - "gpu_memory_in_gbs": 96, |
71 | | - "gpu_type": "A10", |
| 69 | + "gpu_memory_in_gbs": 192, |
| 70 | + "gpu_type": "L40S", |
72 | 71 | "quantization": [ |
73 | 72 | "awq", |
74 | 73 | "gptq", |
75 | 74 | "marlin", |
| 75 | + "fp8", |
76 | 76 | "int8", |
77 | 77 | "bitblas", |
78 | 78 | "aqlm", |
|
81 | 81 | "gguf" |
82 | 82 | ], |
83 | 83 | "ranking": { |
84 | | - "cost": 50, |
85 | | - "performance": 50 |
| 84 | + "cost": 60, |
| 85 | + "performance": 80 |
86 | 86 | } |
87 | 87 | }, |
88 | | - "memory_in_gbs": 1024, |
89 | | - "name": "BM.GPU.A10.4", |
| 88 | + "memory_in_gbs": null, |
| 89 | + "name": "BM.GPU.L40S-NC.4", |
90 | 90 | "shape_series": "GPU" |
91 | 91 | } |
92 | 92 | }, |
|
95 | 95 | { |
96 | 96 | "deployment_params": { |
97 | 97 | "env_var": null, |
98 | | - "max_model_len": null, |
99 | | - "params": "", |
| 98 | + "max_model_len": 65536, |
| 99 | + "params": "--max-model-len 65536", |
100 | 100 | "quantization": null, |
101 | 101 | "weight_dtype": null |
102 | 102 | }, |
103 | 103 | "model_details": null, |
104 | | - "recommendation": "No override PARAMS and ENV variables needed. \n\nModel fits well within the allowed compute shape." |
| 104 | + "recommendation": "Model fits well within the allowed compute shape." |
105 | 105 | } |
106 | 106 | ], |
107 | 107 | "shape_details": { |
108 | | - "available": true, |
109 | | - "core_count": 30, |
| 108 | + "available": false, |
| 109 | + "core_count": null, |
110 | 110 | "gpu_specs": { |
111 | | - "cpu_count": 30, |
112 | | - "cpu_memory_in_gbs": 480, |
113 | | - "gpu_count": 2, |
114 | | - "gpu_memory_in_gbs": 48, |
| 111 | + "cpu_count": 15, |
| 112 | + "cpu_memory_in_gbs": 240, |
| 113 | + "gpu_count": 1, |
| 114 | + "gpu_memory_in_gbs": 24, |
115 | 115 | "gpu_type": "A10", |
116 | 116 | "quantization": [ |
117 | 117 | "awq", |
|
125 | 125 | "gguf" |
126 | 126 | ], |
127 | 127 | "ranking": { |
128 | | - "cost": 40, |
129 | | - "performance": 40 |
| 128 | + "cost": 20, |
| 129 | + "performance": 30 |
130 | 130 | } |
131 | 131 | }, |
132 | | - "memory_in_gbs": 480, |
133 | | - "name": "VM.GPU.A10.2", |
| 132 | + "memory_in_gbs": null, |
| 133 | + "name": "VM.GPU.A10.1", |
134 | 134 | "shape_series": "GPU" |
135 | 135 | } |
136 | 136 | }, |
|
139 | 139 | { |
140 | 140 | "deployment_params": { |
141 | 141 | "env_var": null, |
142 | | - "max_model_len": 65536, |
143 | | - "params": "--max-model-len 65536", |
| 142 | + "max_model_len": null, |
| 143 | + "params": "", |
144 | 144 | "quantization": null, |
145 | 145 | "weight_dtype": null |
146 | 146 | }, |
147 | 147 | "model_details": null, |
148 | | - "recommendation": "Model fits well within the allowed compute shape." |
| 148 | + "recommendation": "No override PARAMS and ENV variables needed. \n\nModel fits well within the allowed compute shape." |
149 | 149 | } |
150 | 150 | ], |
151 | 151 | "shape_details": { |
152 | | - "available": true, |
153 | | - "core_count": 15, |
| 152 | + "available": false, |
| 153 | + "core_count": null, |
154 | 154 | "gpu_specs": { |
155 | | - "cpu_count": 15, |
156 | | - "cpu_memory_in_gbs": 240, |
157 | | - "gpu_count": 1, |
158 | | - "gpu_memory_in_gbs": 24, |
| 155 | + "cpu_count": 30, |
| 156 | + "cpu_memory_in_gbs": 480, |
| 157 | + "gpu_count": 2, |
| 158 | + "gpu_memory_in_gbs": 48, |
159 | 159 | "gpu_type": "A10", |
160 | 160 | "quantization": [ |
161 | 161 | "awq", |
|
169 | 169 | "gguf" |
170 | 170 | ], |
171 | 171 | "ranking": { |
172 | | - "cost": 20, |
173 | | - "performance": 30 |
| 172 | + "cost": 40, |
| 173 | + "performance": 40 |
174 | 174 | } |
175 | 175 | }, |
176 | | - "memory_in_gbs": 240, |
177 | | - "name": "VM.GPU.A10.1", |
| 176 | + "memory_in_gbs": null, |
| 177 | + "name": "VM.GPU.A10.2", |
178 | 178 | "shape_series": "GPU" |
179 | 179 | } |
180 | 180 | } |
|
0 commit comments