@@ -25,9 +25,7 @@ import type { ContainerProviderConnection, MountConfig } from '@podman-desktop/a
2525import * as images from '../../assets/inference-images.json' ;
2626import { LABEL_INFERENCE_SERVER } from '../../utils/inferenceUtils' ;
2727import { DISABLE_SELINUX_LABEL_SECURITY_OPTION } from '../../utils/utils' ;
28- import { basename , dirname } from 'node:path' ;
29- import { join as joinposix } from 'node:path/posix' ;
30- import { getLocalModelFile } from '../../utils/modelsUtils' ;
28+ import { getHuggingFaceModelMountInfo } from '../../utils/modelsUtils' ;
3129import { SECOND } from './LlamaCppPython' ;
3230
3331export class VLLM extends InferenceProvider {
@@ -72,14 +70,9 @@ export class VLLM extends InferenceProvider {
7270 // something ~/.cache/huggingface/hub/models--facebook--opt-125m/snapshots
7371 // modelInfo.file.path
7472
75- const fullPath = getLocalModelFile ( modelInfo ) ;
76-
77- // modelInfo.file.path must be under the form $(HF_HUB_CACHE)/<repo-type>--<repo-id>/snapshots/<commit-hash>
78- const parent = dirname ( fullPath ) ;
79- const commitHash = basename ( fullPath ) ;
80- const name = basename ( parent ) ;
81- if ( name !== 'snapshots' ) throw new Error ( 'you must provide snapshot path for vllm' ) ;
82- const modelCache = dirname ( parent ) ;
73+ // get model mount settings
74+ const mountInfo = getHuggingFaceModelMountInfo ( modelInfo ) ;
75+ const modelCache = mountInfo . suffix ? `/cache/${ mountInfo . suffix } ` : '/cache' ;
8376
8477 let connection : ContainerProviderConnection | undefined ;
8578 if ( config . connection ) {
@@ -101,12 +94,12 @@ export class VLLM extends InferenceProvider {
10194 // TRANSFORMERS_OFFLINE for legacy
10295 const envs : string [ ] = [ `HF_HUB_CACHE=/cache` , 'TRANSFORMERS_OFFLINE=1' , 'HF_HUB_OFFLINE=1' ] ;
10396
104- labels [ 'api' ] = `http://localhost:${ config . port } /inference ` ;
97+ labels [ 'api' ] = `http://localhost:${ config . port } /v1 ` ;
10598
10699 const mounts : MountConfig = [
107100 {
108- Target : `/cache/ ${ modelInfo . id } ` ,
109- Source : modelCache ,
101+ Target : `/cache` ,
102+ Source : mountInfo . mount ,
110103 Type : 'bind' ,
111104 } ,
112105 ] ;
@@ -137,8 +130,8 @@ export class VLLM extends InferenceProvider {
137130 } ,
138131 Env : envs ,
139132 Cmd : [
140- `--model=${ joinposix ( '/cache' , modelInfo . id , 'snapshots' , commitHash ) } ` ,
141- `--served_model_name=${ modelInfo . file . file } ` ,
133+ `--model=${ modelCache } ` ,
134+ `--served_model_name=${ modelInfo . name } ` ,
142135 '--chat-template-content-format=openai' ,
143136 ] ,
144137 } ,
0 commit comments