-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathphi3_server_api.py
More file actions
75 lines (61 loc) · 2.52 KB
/
Copy pathphi3_server_api.py
File metadata and controls
75 lines (61 loc) · 2.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# In phi3_server_api.py
import torch
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import pipeline, BitsAndBytesConfig
import logging
# --- Setup Logging ---
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# --- Global Model Loading ---
logger.info("Initializing local model... This may take a few minutes.")
model_id = "microsoft/phi-2"
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
)
llm_pipeline = pipeline(
"text-generation",
model=model_id,
trust_remote_code=True,
model_kwargs={
"torch_dtype": torch.bfloat16,
"quantization_config": quantization_config,
},
device_map="auto",
)
logger.info("Local model initialized successfully and ready to serve.")
# --- API Definition ---
app = FastAPI()
class HTMLPayload(BaseModel):
html: str
@app.post("/generate_selectors")
async def generate_selectors(payload: HTMLPayload):
try:
logger.info("Received request to generate selectors.")
# **FIX APPLIED HERE:**
# Instead of using apply_chat_template, we build the prompt manually
# in a format that phi-2 understands.
prompt = (
"Instruct: You are an expert web scraper assistant. Your task is to analyze HTML and identify CSS selectors "
"for elements that likely lead to PDF files. You must return your findings as a single, raw JSON object "
"with one key, 'selectors', which holds a list of these selector strings. "
"Do not include any other text, explanations, or markdown.\n"
f"HTML:\n```html\n{payload.html}\n```\n"
"Output:"
)
outputs = llm_pipeline(
prompt,
max_new_tokens=1024,
do_sample=False,
# Temperature is ignored when do_sample=False, so it can be removed, but it's not harmful.
)
# We get the raw generated text. For phi-2, we don't need to clean it like a chat model.
generated_text = outputs[0]['generated_text']
logger.info("Successfully generated response.")
return {"result_text": generated_text}
except Exception as e:
logger.error(f"An internal error occurred: {e}", exc_info=True)
raise HTTPException(status_code=503, detail=f"Model processing failed: {e}")