-
Notifications
You must be signed in to change notification settings - Fork 15
Expand file tree
/
Copy pathscrape_multi_format.py
More file actions
40 lines (33 loc) · 1016 Bytes
/
scrape_multi_format.py
File metadata and controls
40 lines (33 loc) · 1016 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from dotenv import load_dotenv
load_dotenv()
from scrapegraph_py import (
ScrapeGraphAI,
ScrapeRequest,
MarkdownFormatConfig,
LinksFormatConfig,
ScreenshotFormatConfig,
)
sgai = ScrapeGraphAI()
res = sgai.scrape(ScrapeRequest(
url="https://example.com",
formats=[
MarkdownFormatConfig(),
LinksFormatConfig(),
ScreenshotFormatConfig(width=1280, height=720),
],
))
if res.status == "success":
results = res.data.results
print("=== Markdown ===")
print(results.get("markdown", {}).get("data", [""])[0][:500], "...")
print("\n=== Links ===")
links = results.get("links", {}).get("data", [])
print(f"Found {len(links)} links")
for link in links[:5]:
print(f" - {link}")
print("\n=== Screenshot ===")
screenshot = results.get("screenshot", {}).get("data", {})
print(f"URL: {screenshot.get('url')}")
print(f"Size: {screenshot.get('width')}x{screenshot.get('height')}")
else:
print("Failed:", res.error)