-
Notifications
You must be signed in to change notification settings - Fork 15
Expand file tree
/
Copy pathcrawl_basic_async.py
More file actions
36 lines (30 loc) · 1.25 KB
/
crawl_basic_async.py
File metadata and controls
36 lines (30 loc) · 1.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from dotenv import load_dotenv
load_dotenv()
import asyncio
from scrapegraph_py import AsyncScrapeGraphAI, CrawlRequest
async def main():
async with AsyncScrapeGraphAI() as sgai:
start_res = await sgai.crawl.start(CrawlRequest(
url="https://scrapegraphai.com/",
max_pages=5,
max_depth=2,
))
if start_res.status != "success" or not start_res.data:
print("Failed to start:", start_res.error)
else:
crawl_id = start_res.data.id
print("Crawl started:", crawl_id)
status = start_res.data.status
while status == "running":
await asyncio.sleep(2)
get_res = await sgai.crawl.get(crawl_id)
if get_res.status != "success" or not get_res.data:
print("Failed to get status:", get_res.error)
break
status = get_res.data.status
print(f"Progress: {get_res.data.finished}/{get_res.data.total} - {status}")
if status in ("completed", "failed"):
print("\nPages crawled:")
for page in get_res.data.pages:
print(f" {page.url} - {page.status}")
asyncio.run(main())