-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathsmartcrawler_tool_schema.py
More file actions
46 lines (35 loc) · 1.18 KB
/
Copy pathsmartcrawler_tool_schema.py
File metadata and controls
46 lines (35 loc) · 1.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import json
from pydantic import BaseModel, Field
from scrapegraph_py.logger import sgai_logger
from langchain_scrapegraph.tools import SmartCrawlerTool
sgai_logger.set_logging(level="INFO")
# Define the output schema
class CompanyInfo(BaseModel):
company_description: str = Field(description="What the company does")
privacy_policy: str = Field(description="Privacy policy content")
terms_of_service: str = Field(description="Terms of service content")
# Initialize the tool with the schema
tool = SmartCrawlerTool(llm_output_schema=CompanyInfo)
# Example crawling with structured output
url = "https://scrapegraphai.com/"
prompt = (
"What does the company do? and I need text content from their privacy and terms"
)
# Use the tool with crawling parameters and structured output
result = tool.invoke(
{
"url": url,
"prompt": prompt,
"cache_website": True,
"depth": 2,
"max_pages": 2,
"same_domain_only": True,
}
)
print(json.dumps(result, indent=2))
# The output will be structured according to the CompanyInfo schema:
# {
# "company_description": "...",
# "privacy_policy": "...",
# "terms_of_service": "..."
# }