-
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
/
Copy pathhtml_mode.py
51 lines (39 loc) · 1.32 KB
/
html_mode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
"""
Basic example of scraping pipeline using SmartScraper
By default smart scraper converts in md format the
code. If you want to just use the original code, you have
to specify in the confi
"""
import json
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.getenv("OPENAI_API_KEY"),
"model": "openai/gpt-4o",
},
"html_mode": True,
"verbose": True,
"headless": False,
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me what does the company do, the name and a contact email.",
source="https://scrapegraphai.com/",
config=graph_config,
)
result = smart_scraper_graph.run()
print(json.dumps(result, indent=4))
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))