-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathsmartscraper_js_example.py
More file actions
86 lines (64 loc) · 2.4 KB
/
smartscraper_js_example.py
File metadata and controls
86 lines (64 loc) · 2.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python3
"""
SmartScraper Heavy JavaScript Example
This example demonstrates how to use SmartScraper with render_heavy_js enabled
for scraping JavaScript-heavy websites that require full browser rendering.
Features demonstrated:
- SmartScraper with heavy JavaScript rendering
- Basic error handling
- Environment variable configuration
- Simple API usage pattern
Requirements:
- A .env file with your SGAI_API_KEY
Example .env file:
SGAI_API_KEY=your_api_key_here
"""
import os
from dotenv import load_dotenv
from scrapegraph_py import Client
from scrapegraph_py.logger import sgai_logger
# Load environment variables from .env file
load_dotenv()
sgai_logger.set_logging(level="INFO")
def main():
"""Run a SmartScraper example with heavy JavaScript rendering."""
print("🌐 SmartScraper Heavy JavaScript Example")
print("=" * 50)
# Initialize the client with API key from environment variable
api_key = os.getenv("SGAI_API_KEY")
if not api_key:
print("❌ Error: SGAI_API_KEY environment variable not set")
print("Please either:")
print(" 1. Set environment variable: export SGAI_API_KEY='your-api-key-here'")
print(" 2. Create a .env file with: SGAI_API_KEY=your-api-key-here")
return False
client = Client(api_key=api_key)
try:
# Configuration
website_url = "https://example.com" # Replace with your target URL
user_prompt = "Find the CEO of company X and their contact details"
print(f"🔗 Target URL: {website_url}")
print(f"📝 Query: {user_prompt}")
print("🔧 Mode: Heavy JavaScript rendering enabled")
# SmartScraper request with render_heavy_js enabled
response = client.smartscraper(
website_url=website_url,
user_prompt=user_prompt,
render_heavy_js=True, # Enable heavy JavaScript rendering
)
print("\n✅ SmartScraper completed successfully!")
print(f"📄 Request ID: {response.get('request_id', 'N/A')}")
# Display the results
if "result" in response:
print("\n📝 Extracted Information:")
print(response["result"])
return True
except Exception as e:
print(f"❌ Error: {str(e)}")
return False
finally:
# Close the client
client.close()
if __name__ == "__main__":
success = main()
exit(0 if success else 1)