-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathDockerfile
More file actions
41 lines (30 loc) · 1.09 KB
/
Dockerfile
File metadata and controls
41 lines (30 loc) · 1.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
FROM nvcr.io/nvidia/pytorch:22.12-py3
# RUN apt-get update && apt-get install -y git
# RUN pip install --upgrade --no-input pip
# RUN pip uninstall -y torch
# FROM wasertech/vllm-inference-api-openai:latest
# For OpenIA API Interface
# RUN pip install --no-input fschat
# RUN pip install --no-input accelerate
# RUN pip install --no-input "pydantic>2"
# For embedding with langchain using sentence-transformers
RUN pip install --no-input "langchain>=0.0.340"
RUN pip install --no-input "sentence-transformers>=2.2.2"
RUN pip install --no-input "vllm>=0.2.0"
# Try wasertech/vllm@awq-sm_75
# RUN TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;8.9;9.0" pip install --no-input git+https://github.com/wasertech/vllm.git@awq-sm_75
# Try vllm-lora
# RUN pip install --no-input 'git+https://github.com/VectorInstitute/vllm-lora'
WORKDIR /app
ARG host=""
ENV HOST=${host}
ARG port=""
ENV PORT=${port}
ARG model_id=""
ENV MODEL_ID=${model_id}
ARG tokenizer_id=""
ENV TOKENIZER_ID=${tokenizer_id}
ENTRYPOINT ["bash", "app.sh"]
COPY app.py app.py
COPY app.sh app.sh
# ENTRYPOINT ["python3", "-m", "vllm.entrypoints.api_server"]