From a62a06b521835f15efa24c27d28cdd26eac3c87d Mon Sep 17 00:00:00 2001
From: Vlada Dusek <v.dusek96@gmail.com>
Date: Fri, 5 Jun 2026 16:50:16 +0200
Subject: [PATCH 1/3] docs: add Crawl4AI guide

---
 docs/01_introduction/quick-start.mdx          |   1 +
 docs/03_guides/10_crawl4ai.mdx                | 111 ++++++++++++++++++
 .../code/crawl4ai_project/Dockerfile          |  19 +++
 .../crawl4ai_project/my_actor/__init__.py     |   0
 .../crawl4ai_project/my_actor/__main__.py     |   8 ++
 .../code/crawl4ai_project/my_actor/main.py    |  73 ++++++++++++
 .../code/crawl4ai_project/my_actor/scraper.py |  46 ++++++++
 pyproject.toml                                |   4 +
 8 files changed, 262 insertions(+)
 create mode 100644 docs/03_guides/10_crawl4ai.mdx
 create mode 100644 docs/03_guides/code/crawl4ai_project/Dockerfile
 create mode 100644 docs/03_guides/code/crawl4ai_project/my_actor/__init__.py
 create mode 100644 docs/03_guides/code/crawl4ai_project/my_actor/__main__.py
 create mode 100644 docs/03_guides/code/crawl4ai_project/my_actor/main.py
 create mode 100644 docs/03_guides/code/crawl4ai_project/my_actor/scraper.py

diff --git a/docs/01_introduction/quick-start.mdx b/docs/01_introduction/quick-start.mdx
index da166da9..6bd65f39 100644
--- a/docs/01_introduction/quick-start.mdx
+++ b/docs/01_introduction/quick-start.mdx
@@ -105,4 +105,5 @@ To see how you can integrate the Apify SDK with popular web scraping libraries,
 - [Selenium](../guides/selenium)
 - [Crawlee](../guides/crawlee)
 - [Scrapy](../guides/scrapy)
+- [Crawl4AI](../guides/crawl4ai)
 - [Running webserver](../guides/running-webserver)
diff --git a/docs/03_guides/10_crawl4ai.mdx b/docs/03_guides/10_crawl4ai.mdx
new file mode 100644
index 00000000..7dc996d8
--- /dev/null
+++ b/docs/03_guides/10_crawl4ai.mdx
@@ -0,0 +1,111 @@
+---
+id: crawl4ai
+title: Use Crawl4AI
+description: Build an Apify Actor that scrapes web pages into LLM-ready markdown using the Crawl4AI library.
+---
+
+import CodeBlock from '@theme/CodeBlock';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+import Crawl4aiMain from '!!raw-loader!./code/crawl4ai_project/my_actor/main.py';
+import Crawl4aiScraper from '!!raw-loader!./code/crawl4ai_project/my_actor/scraper.py';
+import Crawl4aiEntrypoint from '!!raw-loader!./code/crawl4ai_project/my_actor/__main__.py';
+import Crawl4aiDockerfile from '!!raw-loader!./code/crawl4ai_project/Dockerfile';
+
+In this guide, you'll learn how to use the [Crawl4AI](https://crawl4ai.com/) library in your Apify Actors.
+
+## Introduction
+
+[Crawl4AI](https://crawl4ai.com/) is an open-source, asynchronous web crawler built for LLM and AI workflows. It renders a page in a real browser and turns the result into clean, structured markdown that's ready to feed into a language model or a retrieval-augmented generation (RAG) pipeline, while still giving you the raw HTML, extracted links, and media when you need them.
+
+Some of the features that make Crawl4AI a good fit for Apify Actors:
+
+- **LLM-ready markdown** - Crawl4AI converts each page into clean markdown, stripping boilerplate and optionally filtering content, so the output can be fed straight into a language model.
+- **Real browser rendering** - Pages are loaded in a [Playwright](https://playwright.dev/)-driven browser, so JavaScript-heavy and dynamically rendered websites work out of the box.
+- **Built-in link and media extraction** - Every crawl returns the page's links already split into `internal` and `external` groups, together with the media it found, which makes recursive crawling straightforward.
+- **Flexible extraction strategies** - Beyond markdown, Crawl4AI can extract structured data with CSS/XPath schemas or with an LLM, all configured per request.
+- **First-class async support** - The `AsyncWebCrawler` is built on `asyncio`, which integrates naturally with the asyncio-based Apify SDK.
+- **Per-request proxy** - Each request can be routed through its own proxy, which pairs well with Apify Proxy and its rotating IP addresses.
+
+Crawl4AI drives a real browser through Playwright, so after installing the library you need to download the browser binaries once with the `crawl4ai-setup` command:
+
+```bash
+pip install crawl4ai
+crawl4ai-setup
+```
+
+## Example Actor
+
+The following Actor recursively crawls pages, starting from the URLs in the Actor input and following links up to a user-defined maximum depth. It uses Crawl4AI's `AsyncWebCrawler` to render each page through [Apify Proxy](https://docs.apify.com/platform/proxy), stores the page's markdown in the dataset, and follows the internal links that Crawl4AI discovers.
+
+The code is split into three small modules, following the structure of the Apify Python Actor templates:
+
+- `my_actor/main.py` - The Actor's main coroutine. It handles the [Actor](https://docs.apify.com/platform/actors) lifecycle, reads the input, sets up [Apify Proxy](https://docs.apify.com/platform/proxy) and the [request queue](https://docs.apify.com/platform/storage/request-queue), opens a single browser-backed crawler, and drives the crawl.
+- `my_actor/scraper.py` - The Crawl4AI-specific logic. A single `scrape_page` function crawls a page and returns the extracted data together with the links found on it.
+- `my_actor/__main__.py` - The entry point that runs the `main` coroutine with `asyncio`.
+
+<Tabs>
+    <TabItem value="main.py" label="my_actor/main.py">
+        <CodeBlock className="language-python">
+            {Crawl4aiMain}
+        </CodeBlock>
+    </TabItem>
+    <TabItem value="scraper.py" label="my_actor/scraper.py">
+        <CodeBlock className="language-python">
+            {Crawl4aiScraper}
+        </CodeBlock>
+    </TabItem>
+    <TabItem value="__main__.py" label="my_actor/__main__.py">
+        <CodeBlock className="language-python">
+            {Crawl4aiEntrypoint}
+        </CodeBlock>
+    </TabItem>
+</Tabs>
+
+A few things worth pointing out:
+
+- A single `AsyncWebCrawler` is opened once and reused for every request. The crawler manages one browser instance, so reusing it across the whole crawl is far cheaper than launching a new browser per page.
+- Keeping the crawling and parsing in `scrape_page` separates the Crawl4AI-specific code from the Actor's orchestration logic. The function returns the extracted data together with the discovered links, so `my_actor/main.py` decides what to store and what to enqueue.
+- `result.markdown` is the rendered page as clean markdown, and `result.metadata` carries page-level fields such as the title - exactly the kind of output you want when preparing data for an LLM.
+- `result.links` already separates `internal` (same-site) links from `external` ones, so the example follows only the internal links to keep the crawl on the same website.
+- `CacheMode.BYPASS` tells Crawl4AI to always fetch a fresh copy of the page instead of serving it from its local cache.
+
+## Using Apify Proxy
+
+Running on the Apify platform gives your scraper access to [Apify Proxy](https://docs.apify.com/platform/proxy), which rotates IP addresses to avoid rate limiting and blocking. In the example above, `my_actor/main.py` creates a proxy configuration with `Actor.create_proxy_configuration` and passes a fresh proxy URL to `scrape_page` for every request, which forwards it to Crawl4AI's per-request `CrawlerRunConfig`.
+
+`ProxyConfig.from_string` parses the proxy URL returned by `ProxyConfiguration.new_url` (for example `http://groups-RESIDENTIAL:<password>@proxy.apify.com:8000`) into the server, username, and password that the browser needs - the browser cannot take the credentials embedded directly in the URL. To select specific proxy groups or a country, pass the relevant arguments to `Actor.create_proxy_configuration`. For more details, see the [Proxy management](../concepts/proxy-management) guide.
+
+## Running on the Apify platform
+
+Because Crawl4AI renders pages in a real browser, the Actor image needs a browser and its system-level dependencies. Build on top of the [Apify Playwright base image](https://hub.docker.com/r/apify/actor-python-playwright), which already ships a browser - Crawl4AI reuses those binaries, so no separate browser-install step is required in the Dockerfile.
+
+Add `apify` and `crawl4ai` to your `requirements.txt`:
+
+```text
+apify
+crawl4ai
+```
+
+<Tabs>
+    <TabItem value="Dockerfile" label="Dockerfile">
+        <CodeBlock className="language-docker">
+            {Crawl4aiDockerfile}
+        </CodeBlock>
+    </TabItem>
+</Tabs>
+
+The example pins the Python 3.13 base image because some of Crawl4AI's dependencies do not yet publish wheels for the newest Python versions, which would otherwise force a slow source build during the image build.
+
+## Conclusion
+
+In this guide, you learned how to use Crawl4AI in your Apify Actors. You can now render pages in a real browser, turn them into LLM-ready markdown, follow the links Crawl4AI discovers, route requests through Apify Proxy, and run the whole thing on the Apify platform. See the [Actor templates](https://apify.com/templates/categories/python) to get started with your own scraping tasks. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy scraping!
+
+## Additional resources
+
+- [Crawl4AI: Official documentation](https://docs.crawl4ai.com/)
+- [Crawl4AI: AsyncWebCrawler and configuration](https://docs.crawl4ai.com/api/async-webcrawler/)
+- [Crawl4AI: Proxy and security](https://docs.crawl4ai.com/advanced/proxy-security/)
+- [Crawl4AI: GitHub repository](https://github.com/unclecode/crawl4ai)
+- [Apify: Proxy management](https://docs.apify.com/platform/proxy)
diff --git a/docs/03_guides/code/crawl4ai_project/Dockerfile b/docs/03_guides/code/crawl4ai_project/Dockerfile
new file mode 100644
index 00000000..348f6ff2
--- /dev/null
+++ b/docs/03_guides/code/crawl4ai_project/Dockerfile
@@ -0,0 +1,19 @@
+# Use the Apify Playwright base image, which already ships a browser together
+# with all of its system-level dependencies. Crawl4AI drives this browser
+# through Playwright and reuses the binaries the image provides, so no separate
+# browser-install step is needed.
+#
+# The Python 3.13 image is used because some of Crawl4AI's dependencies do not
+# yet publish wheels for newer Python versions.
+FROM apify/actor-python-playwright:3.13-1.60.0
+
+# Copy just requirements.txt first to leverage the Docker build cache.
+COPY --chown=myuser:myuser requirements.txt ./
+RUN pip install -r requirements.txt
+
+# Copy the rest of the source code and verify that it compiles.
+COPY --chown=myuser:myuser . ./
+RUN python -m compileall -q my_actor/
+
+# Specify how to launch the Actor.
+CMD ["python", "-m", "my_actor"]
diff --git a/docs/03_guides/code/crawl4ai_project/my_actor/__init__.py b/docs/03_guides/code/crawl4ai_project/my_actor/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/docs/03_guides/code/crawl4ai_project/my_actor/__main__.py b/docs/03_guides/code/crawl4ai_project/my_actor/__main__.py
new file mode 100644
index 00000000..6aeaf3d5
--- /dev/null
+++ b/docs/03_guides/code/crawl4ai_project/my_actor/__main__.py
@@ -0,0 +1,8 @@
+from __future__ import annotations
+
+import asyncio
+
+from .main import main
+
+if __name__ == '__main__':
+    asyncio.run(main())
diff --git a/docs/03_guides/code/crawl4ai_project/my_actor/main.py b/docs/03_guides/code/crawl4ai_project/my_actor/main.py
new file mode 100644
index 00000000..4e6befe6
--- /dev/null
+++ b/docs/03_guides/code/crawl4ai_project/my_actor/main.py
@@ -0,0 +1,73 @@
+from __future__ import annotations
+
+from crawl4ai import AsyncWebCrawler, BrowserConfig
+
+from apify import Actor, Request
+
+from .scraper import scrape_page
+
+
+async def main() -> None:
+    # Enter the context of the Actor.
+    async with Actor:
+        # Retrieve the Actor input, and use default values if not provided.
+        actor_input = await Actor.get_input() or {}
+        start_urls = actor_input.get('start_urls', [{'url': 'https://crawlee.dev'}])
+        max_depth = actor_input.get('max_depth', 1)
+
+        # Exit if no start URLs are provided.
+        if not start_urls:
+            Actor.log.info('No start URLs specified in Actor input, exiting...')
+            await Actor.exit()
+
+        # Create a proxy configuration that routes requests through Apify Proxy.
+        proxy_configuration = await Actor.create_proxy_configuration()
+
+        # Open the default request queue for handling URLs to be processed.
+        request_queue = await Actor.open_request_queue()
+
+        # Enqueue the start URLs. Their crawl depth defaults to 0.
+        for start_url in start_urls:
+            url = start_url.get('url')
+            Actor.log.info(f'Enqueuing {url} ...')
+            await request_queue.add_request(Request.from_url(url))
+
+        # Configure the headless browser that Crawl4AI drives.
+        browser_config = BrowserConfig(headless=True)
+
+        # Open a single browser-backed crawler and reuse it for every request.
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            # Process the URLs from the request queue.
+            while request := await request_queue.fetch_next_request():
+                url = request.url
+
+                # Read the crawl depth tracked by the request itself.
+                depth = request.crawl_depth
+                Actor.log.info(f'Scraping {url} (depth={depth}) ...')
+
+                try:
+                    # Get a fresh proxy URL for each request (None if no proxy set up).
+                    proxy_url = None
+                    if proxy_configuration:
+                        proxy_url = await proxy_configuration.new_url()
+
+                    # Crawl the page and extract its markdown and nested links.
+                    data, links = await scrape_page(crawler, url, proxy_url=proxy_url)
+
+                    # Store the extracted data to the default dataset.
+                    await Actor.push_data(data)
+
+                    # If we are not too deep yet, enqueue the links we found one
+                    # level deeper than the current page.
+                    if depth < max_depth:
+                        for link_url in links:
+                            new_request = Request.from_url(link_url)
+                            new_request.crawl_depth = depth + 1
+                            await request_queue.add_request(new_request)
+
+                except Exception:
+                    Actor.log.exception(f'Cannot extract data from {url}.')
+
+                finally:
+                    # Mark the request as handled so it is not processed again.
+                    await request_queue.mark_request_as_handled(request)
diff --git a/docs/03_guides/code/crawl4ai_project/my_actor/scraper.py b/docs/03_guides/code/crawl4ai_project/my_actor/scraper.py
new file mode 100644
index 00000000..f96f76e3
--- /dev/null
+++ b/docs/03_guides/code/crawl4ai_project/my_actor/scraper.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from crawl4ai import CacheMode, CrawlerRunConfig, ProxyConfig
+
+if TYPE_CHECKING:
+    from crawl4ai import AsyncWebCrawler
+
+
+async def scrape_page(
+    crawler: AsyncWebCrawler,
+    url: str,
+    *,
+    proxy_url: str | None = None,
+) -> tuple[dict[str, Any], list[str]]:
+    """Crawl a single page with Crawl4AI and extract its markdown and links.
+
+    The page is rendered in the browser managed by `crawler`, and Crawl4AI turns
+    the result into clean, LLM-ready markdown. Setting `proxy_config` on the
+    per-request `CrawlerRunConfig` routes this request through Apify Proxy, so
+    every page can use a fresh IP address.
+    """
+    run_config = CrawlerRunConfig(
+        cache_mode=CacheMode.BYPASS,
+        proxy_config=ProxyConfig.from_string(proxy_url) if proxy_url else None,
+    )
+
+    result = await crawler.arun(url, config=run_config)
+    if not result.success:
+        raise RuntimeError(result.error_message or f'Failed to crawl {url}')
+
+    # `result.markdown` is the rendered page as clean markdown, and
+    # `result.metadata` carries page-level fields such as the title.
+    data = {
+        'url': result.url,
+        'title': (result.metadata or {}).get('title'),
+        'markdown': str(result.markdown),
+    }
+
+    # Crawl4AI already splits links into `internal` (same site) and `external`.
+    # We follow only the internal ones to keep the crawl on the same website.
+    internal_links = result.links.get('internal', [])
+    links = [link['href'] for link in internal_links if link.get('href')]
+
+    return data, links
diff --git a/pyproject.toml b/pyproject.toml
index d17bdc01..38846e70 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -181,6 +181,10 @@ indent-style = "space"
     # Local imports in Scrapy project.
     "TID252", # Prefer absolute imports over relative imports from parent modules
 ]
+"**/docs/**/crawl4ai_project/**" = [
+    # Local imports are mixed up with the Apify SDK.
+    "I001", # Import block is un-sorted or un-formatted
+]
 
 [tool.ruff.lint.flake8-quotes]
 docstring-quotes = "double"

From aef18134a30dbf2159b73ab2b111406815c40c04 Mon Sep 17 00:00:00 2001
From: Vlada Dusek <v.dusek96@gmail.com>
Date: Fri, 5 Jun 2026 20:45:18 +0200
Subject: [PATCH 2/3] docs: renumber Crawl4AI guide to 08 and switch to a
 single-file example

---
 .../{10_crawl4ai.mdx => 08_crawl4ai.mdx}      |  57 ++------
 docs/03_guides/code/08_crawl4ai.py            | 124 ++++++++++++++++++
 .../code/crawl4ai_project/Dockerfile          |  19 ---
 .../crawl4ai_project/my_actor/__init__.py     |   0
 .../crawl4ai_project/my_actor/__main__.py     |   8 --
 .../code/crawl4ai_project/my_actor/main.py    |  73 -----------
 .../code/crawl4ai_project/my_actor/scraper.py |  46 -------
 7 files changed, 137 insertions(+), 190 deletions(-)
 rename docs/03_guides/{10_crawl4ai.mdx => 08_crawl4ai.mdx} (68%)
 create mode 100644 docs/03_guides/code/08_crawl4ai.py
 delete mode 100644 docs/03_guides/code/crawl4ai_project/Dockerfile
 delete mode 100644 docs/03_guides/code/crawl4ai_project/my_actor/__init__.py
 delete mode 100644 docs/03_guides/code/crawl4ai_project/my_actor/__main__.py
 delete mode 100644 docs/03_guides/code/crawl4ai_project/my_actor/main.py
 delete mode 100644 docs/03_guides/code/crawl4ai_project/my_actor/scraper.py

diff --git a/docs/03_guides/10_crawl4ai.mdx b/docs/03_guides/08_crawl4ai.mdx
similarity index 68%
rename from docs/03_guides/10_crawl4ai.mdx
rename to docs/03_guides/08_crawl4ai.mdx
index 7dc996d8..0802c002 100644
--- a/docs/03_guides/10_crawl4ai.mdx
+++ b/docs/03_guides/08_crawl4ai.mdx
@@ -1,19 +1,14 @@
 ---
 id: crawl4ai
-title: Use Crawl4AI
+title: LLM-ready scraping with Crawl4AI
 description: Build an Apify Actor that scrapes web pages into LLM-ready markdown using the Crawl4AI library.
 ---
 
-import CodeBlock from '@theme/CodeBlock';
-import Tabs from '@theme/Tabs';
-import TabItem from '@theme/TabItem';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
 
-import Crawl4aiMain from '!!raw-loader!./code/crawl4ai_project/my_actor/main.py';
-import Crawl4aiScraper from '!!raw-loader!./code/crawl4ai_project/my_actor/scraper.py';
-import Crawl4aiEntrypoint from '!!raw-loader!./code/crawl4ai_project/my_actor/__main__.py';
-import Crawl4aiDockerfile from '!!raw-loader!./code/crawl4ai_project/Dockerfile';
+import Crawl4aiExample from '!!raw-loader!roa-loader!./code/08_crawl4ai.py';
 
-In this guide, you'll learn how to use the [Crawl4AI](https://crawl4ai.com/) library in your Apify Actors.
+In this guide, you'll learn how to use the [Crawl4AI](https://crawl4ai.com/) library for LLM-ready web scraping in your Apify Actors.
 
 ## Introduction
 
@@ -39,41 +34,23 @@ crawl4ai-setup
 
 The following Actor recursively crawls pages, starting from the URLs in the Actor input and following links up to a user-defined maximum depth. It uses Crawl4AI's `AsyncWebCrawler` to render each page through [Apify Proxy](https://docs.apify.com/platform/proxy), stores the page's markdown in the dataset, and follows the internal links that Crawl4AI discovers.
 
-The code is split into three small modules, following the structure of the Apify Python Actor templates:
-
-- `my_actor/main.py` - The Actor's main coroutine. It handles the [Actor](https://docs.apify.com/platform/actors) lifecycle, reads the input, sets up [Apify Proxy](https://docs.apify.com/platform/proxy) and the [request queue](https://docs.apify.com/platform/storage/request-queue), opens a single browser-backed crawler, and drives the crawl.
-- `my_actor/scraper.py` - The Crawl4AI-specific logic. A single `scrape_page` function crawls a page and returns the extracted data together with the links found on it.
-- `my_actor/__main__.py` - The entry point that runs the `main` coroutine with `asyncio`.
-
-<Tabs>
-    <TabItem value="main.py" label="my_actor/main.py">
-        <CodeBlock className="language-python">
-            {Crawl4aiMain}
-        </CodeBlock>
-    </TabItem>
-    <TabItem value="scraper.py" label="my_actor/scraper.py">
-        <CodeBlock className="language-python">
-            {Crawl4aiScraper}
-        </CodeBlock>
-    </TabItem>
-    <TabItem value="__main__.py" label="my_actor/__main__.py">
-        <CodeBlock className="language-python">
-            {Crawl4aiEntrypoint}
-        </CodeBlock>
-    </TabItem>
-</Tabs>
+The whole Actor fits in a single file. A `scrape_page` helper holds the Crawl4AI-specific crawling and parsing, while the `main` coroutine handles the [Actor](https://docs.apify.com/platform/actors) lifecycle, reads the input, sets up [Apify Proxy](https://docs.apify.com/platform/proxy) and the [request queue](https://docs.apify.com/platform/storage/request-queue), opens a single browser-backed crawler, and drives the crawl:
+
+<RunnableCodeBlock className="language-python" language="python">
+    {Crawl4aiExample}
+</RunnableCodeBlock>
 
 A few things worth pointing out:
 
 - A single `AsyncWebCrawler` is opened once and reused for every request. The crawler manages one browser instance, so reusing it across the whole crawl is far cheaper than launching a new browser per page.
-- Keeping the crawling and parsing in `scrape_page` separates the Crawl4AI-specific code from the Actor's orchestration logic. The function returns the extracted data together with the discovered links, so `my_actor/main.py` decides what to store and what to enqueue.
+- Keeping the crawling and parsing in `scrape_page` separates the Crawl4AI-specific code from the Actor's orchestration logic. The function returns the extracted data together with the discovered links, so `main` decides what to store and what to enqueue.
 - `result.markdown` is the rendered page as clean markdown, and `result.metadata` carries page-level fields such as the title - exactly the kind of output you want when preparing data for an LLM.
 - `result.links` already separates `internal` (same-site) links from `external` ones, so the example follows only the internal links to keep the crawl on the same website.
 - `CacheMode.BYPASS` tells Crawl4AI to always fetch a fresh copy of the page instead of serving it from its local cache.
 
 ## Using Apify Proxy
 
-Running on the Apify platform gives your scraper access to [Apify Proxy](https://docs.apify.com/platform/proxy), which rotates IP addresses to avoid rate limiting and blocking. In the example above, `my_actor/main.py` creates a proxy configuration with `Actor.create_proxy_configuration` and passes a fresh proxy URL to `scrape_page` for every request, which forwards it to Crawl4AI's per-request `CrawlerRunConfig`.
+Running on the Apify platform gives your scraper access to [Apify Proxy](https://docs.apify.com/platform/proxy), which rotates IP addresses to avoid rate limiting and blocking. In the example above, `main` creates a proxy configuration with `Actor.create_proxy_configuration` and passes a fresh proxy URL to `scrape_page` for every request, which forwards it to Crawl4AI's per-request `CrawlerRunConfig`.
 
 `ProxyConfig.from_string` parses the proxy URL returned by `ProxyConfiguration.new_url` (for example `http://groups-RESIDENTIAL:<password>@proxy.apify.com:8000`) into the server, username, and password that the browser needs - the browser cannot take the credentials embedded directly in the URL. To select specific proxy groups or a country, pass the relevant arguments to `Actor.create_proxy_configuration`. For more details, see the [Proxy management](../concepts/proxy-management) guide.
 
@@ -81,6 +58,8 @@ Running on the Apify platform gives your scraper access to [Apify Proxy](https:/
 
 Because Crawl4AI renders pages in a real browser, the Actor image needs a browser and its system-level dependencies. Build on top of the [Apify Playwright base image](https://hub.docker.com/r/apify/actor-python-playwright), which already ships a browser - Crawl4AI reuses those binaries, so no separate browser-install step is required in the Dockerfile.
 
+Pin the Python 3.13 variant of that image (for example `apify/actor-python-playwright:3.13-1.60.0`), because some of Crawl4AI's dependencies do not yet publish wheels for the newest Python versions, which would otherwise force a slow source build during the image build.
+
 Add `apify` and `crawl4ai` to your `requirements.txt`:
 
 ```text
@@ -88,16 +67,6 @@ apify
 crawl4ai
 ```
 
-<Tabs>
-    <TabItem value="Dockerfile" label="Dockerfile">
-        <CodeBlock className="language-docker">
-            {Crawl4aiDockerfile}
-        </CodeBlock>
-    </TabItem>
-</Tabs>
-
-The example pins the Python 3.13 base image because some of Crawl4AI's dependencies do not yet publish wheels for the newest Python versions, which would otherwise force a slow source build during the image build.
-
 ## Conclusion
 
 In this guide, you learned how to use Crawl4AI in your Apify Actors. You can now render pages in a real browser, turn them into LLM-ready markdown, follow the links Crawl4AI discovers, route requests through Apify Proxy, and run the whole thing on the Apify platform. See the [Actor templates](https://apify.com/templates/categories/python) to get started with your own scraping tasks. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy scraping!
diff --git a/docs/03_guides/code/08_crawl4ai.py b/docs/03_guides/code/08_crawl4ai.py
new file mode 100644
index 00000000..1c7884c1
--- /dev/null
+++ b/docs/03_guides/code/08_crawl4ai.py
@@ -0,0 +1,124 @@
+import asyncio
+from typing import Any
+
+from crawl4ai import (
+    AsyncWebCrawler,
+    BrowserConfig,
+    CacheMode,
+    CrawlerRunConfig,
+    ProxyConfig,
+)
+
+from apify import Actor, Request
+from apify.storages import RequestQueue
+
+
+async def scrape_page(
+    crawler: AsyncWebCrawler,
+    url: str,
+    *,
+    proxy_url: str | None = None,
+) -> tuple[dict[str, Any], list[str]]:
+    """Crawl a page with Crawl4AI and return its markdown and same-site links."""
+    run_config = CrawlerRunConfig(
+        cache_mode=CacheMode.BYPASS,
+        proxy_config=ProxyConfig.from_string(proxy_url) if proxy_url else None,
+    )
+
+    result = await crawler.arun(url, config=run_config)
+    if not result.success:
+        raise RuntimeError(result.error_message or f'Failed to crawl {url}')
+
+    data = {
+        'url': result.url,
+        'title': (result.metadata or {}).get('title'),
+        'markdown': str(result.markdown),
+    }
+
+    # Crawl4AI already classifies links; follow only the internal ones.
+    internal_links = result.links.get('internal', [])
+    links = [link['href'] for link in internal_links if link.get('href')]
+
+    return data, links
+
+
+async def enqueue_links(
+    request_queue: RequestQueue,
+    links: list[str],
+    *,
+    depth: int,
+    max_depth: int,
+) -> None:
+    """Enqueue the links one level deeper, unless max_depth was reached."""
+    if depth >= max_depth:
+        return
+
+    for link_url in links:
+        Actor.log.info(f'Enqueuing {link_url} ...')
+        request = Request.from_url(link_url)
+        request.crawl_depth = depth + 1
+        await request_queue.add_request(request)
+
+
+async def main() -> None:
+    async with Actor:
+        # Read the Actor input.
+        actor_input = await Actor.get_input() or {}
+        start_urls = actor_input.get('startUrls', [{'url': 'https://crawlee.dev'}])
+        max_depth = actor_input.get('maxDepth', 1)
+
+        if not start_urls:
+            Actor.log.info('No start URLs specified in Actor input, exiting...')
+            await Actor.exit()
+
+        # Set up Apify Proxy and the request queue.
+        proxy_configuration = await Actor.create_proxy_configuration()
+        request_queue = await Actor.open_request_queue()
+
+        # Enqueue the start URLs (crawl depth defaults to 0).
+        for start_url in start_urls:
+            url = start_url.get('url')
+            Actor.log.info(f'Enqueuing start URL: {url}')
+            await request_queue.add_request(Request.from_url(url))
+
+        # Cap the crawl; raise or remove to follow more pages.
+        max_requests = 50
+        handled_requests = 0
+
+        # Reuse one headless browser-backed crawler for every request.
+        browser_config = BrowserConfig(headless=True)
+
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            while handled_requests < max_requests and (
+                request := await request_queue.fetch_next_request()
+            ):
+                handled_requests += 1
+                url = request.url
+                depth = request.crawl_depth
+                Actor.log.info(f'Scraping {url} (depth={depth}) ...')
+
+                try:
+                    # Fresh proxy URL per request (None if no proxy).
+                    proxy_url = None
+                    if proxy_configuration:
+                        proxy_url = await proxy_configuration.new_url()
+
+                    data, links = await scrape_page(crawler, url, proxy_url=proxy_url)
+                    await Actor.push_data(data)
+                    Actor.log.info(
+                        f'Stored data from {url} '
+                        f'(title={data["title"]!r}, {len(links)} links found).'
+                    )
+                    await enqueue_links(
+                        request_queue, links, depth=depth, max_depth=max_depth
+                    )
+
+                except Exception:
+                    Actor.log.exception(f'Cannot extract data from {url}.')
+
+                finally:
+                    await request_queue.mark_request_as_handled(request)
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
diff --git a/docs/03_guides/code/crawl4ai_project/Dockerfile b/docs/03_guides/code/crawl4ai_project/Dockerfile
deleted file mode 100644
index 348f6ff2..00000000
--- a/docs/03_guides/code/crawl4ai_project/Dockerfile
+++ /dev/null
@@ -1,19 +0,0 @@
-# Use the Apify Playwright base image, which already ships a browser together
-# with all of its system-level dependencies. Crawl4AI drives this browser
-# through Playwright and reuses the binaries the image provides, so no separate
-# browser-install step is needed.
-#
-# The Python 3.13 image is used because some of Crawl4AI's dependencies do not
-# yet publish wheels for newer Python versions.
-FROM apify/actor-python-playwright:3.13-1.60.0
-
-# Copy just requirements.txt first to leverage the Docker build cache.
-COPY --chown=myuser:myuser requirements.txt ./
-RUN pip install -r requirements.txt
-
-# Copy the rest of the source code and verify that it compiles.
-COPY --chown=myuser:myuser . ./
-RUN python -m compileall -q my_actor/
-
-# Specify how to launch the Actor.
-CMD ["python", "-m", "my_actor"]
diff --git a/docs/03_guides/code/crawl4ai_project/my_actor/__init__.py b/docs/03_guides/code/crawl4ai_project/my_actor/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/docs/03_guides/code/crawl4ai_project/my_actor/__main__.py b/docs/03_guides/code/crawl4ai_project/my_actor/__main__.py
deleted file mode 100644
index 6aeaf3d5..00000000
--- a/docs/03_guides/code/crawl4ai_project/my_actor/__main__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-
-from .main import main
-
-if __name__ == '__main__':
-    asyncio.run(main())
diff --git a/docs/03_guides/code/crawl4ai_project/my_actor/main.py b/docs/03_guides/code/crawl4ai_project/my_actor/main.py
deleted file mode 100644
index 4e6befe6..00000000
--- a/docs/03_guides/code/crawl4ai_project/my_actor/main.py
+++ /dev/null
@@ -1,73 +0,0 @@
-from __future__ import annotations
-
-from crawl4ai import AsyncWebCrawler, BrowserConfig
-
-from apify import Actor, Request
-
-from .scraper import scrape_page
-
-
-async def main() -> None:
-    # Enter the context of the Actor.
-    async with Actor:
-        # Retrieve the Actor input, and use default values if not provided.
-        actor_input = await Actor.get_input() or {}
-        start_urls = actor_input.get('start_urls', [{'url': 'https://crawlee.dev'}])
-        max_depth = actor_input.get('max_depth', 1)
-
-        # Exit if no start URLs are provided.
-        if not start_urls:
-            Actor.log.info('No start URLs specified in Actor input, exiting...')
-            await Actor.exit()
-
-        # Create a proxy configuration that routes requests through Apify Proxy.
-        proxy_configuration = await Actor.create_proxy_configuration()
-
-        # Open the default request queue for handling URLs to be processed.
-        request_queue = await Actor.open_request_queue()
-
-        # Enqueue the start URLs. Their crawl depth defaults to 0.
-        for start_url in start_urls:
-            url = start_url.get('url')
-            Actor.log.info(f'Enqueuing {url} ...')
-            await request_queue.add_request(Request.from_url(url))
-
-        # Configure the headless browser that Crawl4AI drives.
-        browser_config = BrowserConfig(headless=True)
-
-        # Open a single browser-backed crawler and reuse it for every request.
-        async with AsyncWebCrawler(config=browser_config) as crawler:
-            # Process the URLs from the request queue.
-            while request := await request_queue.fetch_next_request():
-                url = request.url
-
-                # Read the crawl depth tracked by the request itself.
-                depth = request.crawl_depth
-                Actor.log.info(f'Scraping {url} (depth={depth}) ...')
-
-                try:
-                    # Get a fresh proxy URL for each request (None if no proxy set up).
-                    proxy_url = None
-                    if proxy_configuration:
-                        proxy_url = await proxy_configuration.new_url()
-
-                    # Crawl the page and extract its markdown and nested links.
-                    data, links = await scrape_page(crawler, url, proxy_url=proxy_url)
-
-                    # Store the extracted data to the default dataset.
-                    await Actor.push_data(data)
-
-                    # If we are not too deep yet, enqueue the links we found one
-                    # level deeper than the current page.
-                    if depth < max_depth:
-                        for link_url in links:
-                            new_request = Request.from_url(link_url)
-                            new_request.crawl_depth = depth + 1
-                            await request_queue.add_request(new_request)
-
-                except Exception:
-                    Actor.log.exception(f'Cannot extract data from {url}.')
-
-                finally:
-                    # Mark the request as handled so it is not processed again.
-                    await request_queue.mark_request_as_handled(request)
diff --git a/docs/03_guides/code/crawl4ai_project/my_actor/scraper.py b/docs/03_guides/code/crawl4ai_project/my_actor/scraper.py
deleted file mode 100644
index f96f76e3..00000000
--- a/docs/03_guides/code/crawl4ai_project/my_actor/scraper.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any
-
-from crawl4ai import CacheMode, CrawlerRunConfig, ProxyConfig
-
-if TYPE_CHECKING:
-    from crawl4ai import AsyncWebCrawler
-
-
-async def scrape_page(
-    crawler: AsyncWebCrawler,
-    url: str,
-    *,
-    proxy_url: str | None = None,
-) -> tuple[dict[str, Any], list[str]]:
-    """Crawl a single page with Crawl4AI and extract its markdown and links.
-
-    The page is rendered in the browser managed by `crawler`, and Crawl4AI turns
-    the result into clean, LLM-ready markdown. Setting `proxy_config` on the
-    per-request `CrawlerRunConfig` routes this request through Apify Proxy, so
-    every page can use a fresh IP address.
-    """
-    run_config = CrawlerRunConfig(
-        cache_mode=CacheMode.BYPASS,
-        proxy_config=ProxyConfig.from_string(proxy_url) if proxy_url else None,
-    )
-
-    result = await crawler.arun(url, config=run_config)
-    if not result.success:
-        raise RuntimeError(result.error_message or f'Failed to crawl {url}')
-
-    # `result.markdown` is the rendered page as clean markdown, and
-    # `result.metadata` carries page-level fields such as the title.
-    data = {
-        'url': result.url,
-        'title': (result.metadata or {}).get('title'),
-        'markdown': str(result.markdown),
-    }
-
-    # Crawl4AI already splits links into `internal` (same site) and `external`.
-    # We follow only the internal ones to keep the crawl on the same website.
-    internal_links = result.links.get('internal', [])
-    links = [link['href'] for link in internal_links if link.get('href')]
-
-    return data, links

From d6d4dcd1811db37e9a532aaa58625a0db741986e Mon Sep 17 00:00:00 2001
From: Vlada Dusek <v.dusek96@gmail.com>
Date: Fri, 5 Jun 2026 21:03:05 +0200
Subject: [PATCH 3/3] chore: drop unused ruff ignore for the removed Crawl4AI
 project

---
 pyproject.toml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 38846e70..d17bdc01 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -181,10 +181,6 @@ indent-style = "space"
     # Local imports in Scrapy project.
     "TID252", # Prefer absolute imports over relative imports from parent modules
 ]
-"**/docs/**/crawl4ai_project/**" = [
-    # Local imports are mixed up with the Apify SDK.
-    "I001", # Import block is un-sorted or un-formatted
-]
 
 [tool.ruff.lint.flake8-quotes]
 docstring-quotes = "double"