From 54e153d74860c271eb31142644ad088a0b4f6569 Mon Sep 17 00:00:00 2001
From: Vlada Dusek <v.dusek96@gmail.com>
Date: Fri, 5 Jun 2026 10:40:25 +0200
Subject: [PATCH 1/5] docs: Add Scrapling guide

---
 docs/01_introduction/quick-start.mdx |   1 +
 docs/03_guides/09_scrapling.mdx      | 123 +++++++++++++++++++++++++++
 docs/03_guides/code/09_scrapling.py  |  95 +++++++++++++++++++++
 3 files changed, 219 insertions(+)
 create mode 100644 docs/03_guides/09_scrapling.mdx
 create mode 100644 docs/03_guides/code/09_scrapling.py

diff --git a/docs/01_introduction/quick-start.mdx b/docs/01_introduction/quick-start.mdx
index da166da9..c0f8bec3 100644
--- a/docs/01_introduction/quick-start.mdx
+++ b/docs/01_introduction/quick-start.mdx
@@ -105,4 +105,5 @@ To see how you can integrate the Apify SDK with popular web scraping libraries,
 - [Selenium](../guides/selenium)
 - [Crawlee](../guides/crawlee)
 - [Scrapy](../guides/scrapy)
+- [Scrapling](../guides/scrapling)
 - [Running webserver](../guides/running-webserver)
diff --git a/docs/03_guides/09_scrapling.mdx b/docs/03_guides/09_scrapling.mdx
new file mode 100644
index 00000000..459e5a25
--- /dev/null
+++ b/docs/03_guides/09_scrapling.mdx
@@ -0,0 +1,123 @@
+---
+id: scrapling
+title: Use Scrapling
+description: Build an Apify Actor that scrapes web pages using the Scrapling adaptive web scraping library.
+---
+
+import CodeBlock from '@theme/CodeBlock';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
+
+import ScraplingExample from '!!raw-loader!roa-loader!./code/09_scrapling.py';
+
+In this guide, you'll learn how to use the [Scrapling](https://scrapling.readthedocs.io/) library in your Apify Actors.
+
+## Introduction
+
+[Scrapling](https://scrapling.readthedocs.io/) is an adaptive web scraping library for Python that combines fetching and parsing behind a single, high-level API. It can fetch a page with fast HTTP requests or with a real browser, parse the result with familiar CSS selectors and XPath, and even relocate your selectors automatically when a website's structure changes.
+
+Some of the features that make Scrapling a good fit for Apify Actors:
+
+- **Multiple fetchers** - A single API exposes a fast HTTP client with browser TLS-fingerprint impersonation, as well as full browser automation for JavaScript-heavy or protected pages.
+- **Adaptive selectors** - Scrapling can remember the elements you scraped and find them again after a website redesign, so your scrapers keep working with fewer manual fixes.
+- **Anti-bot evasion** - Built-in stealth features (browser impersonation, realistic headers, and automatic Cloudflare Turnstile solving with the browser fetchers) help you avoid being blocked.
+- **Familiar parsing API** - Elements are selected with CSS selectors (including the `::text` and `::attr()` pseudo-elements) or XPath, with a Scrapy/Parsel-like `.get()` and `.getall()` interface.
+- **First-class async support** - Every fetcher has an asynchronous variant, which integrates naturally with the asyncio-based Apify SDK.
+
+Scrapling's parser works on its own, while the fetchers are an optional extra. Install Scrapling with the `fetchers` extra to get the HTTP and browser fetchers:
+
+```bash
+pip install "scrapling[fetchers]"
+```
+
+## Choosing a fetcher
+
+All of Scrapling's fetchers are importable from `scrapling.fetchers`. Pick the one that matches the website you're scraping:
+
+- **`Fetcher` / `AsyncFetcher`** - Plain HTTP requests via `.get()`, `.post()`, `.put()`, and `.delete()`. Fast and lightweight, with optional browser TLS-fingerprint impersonation (`impersonate`) and realistic headers (`stealthy_headers`). This is the best choice for static pages and APIs, and it needs no browser binaries.
+- **`DynamicFetcher` / `DynamicSession`** - Full browser automation based on [Playwright](https://playwright.dev/), for pages that require JavaScript rendering or interaction. Fetch a page with `.fetch()` or its async variant `.async_fetch()`.
+- **`StealthyFetcher` / `StealthySession`** - A stealth-hardened browser fetcher that can automatically solve Cloudflare Turnstile challenges (`solve_cloudflare=True`). Use it for the most heavily protected websites.
+
+The returned `Response` object is also a Scrapling selector, so you can call `.css()`, `.xpath()`, `.find_all()`, and the other parsing methods on it directly.
+
+The HTTP fetchers work with just the `scrapling[fetchers]` extra. The browser-based fetchers (`DynamicFetcher` and `StealthyFetcher`) additionally need browser binaries, which you download with the `scrapling install` command - see [Running browser-based fetchers](#running-browser-based-fetchers) below.
+
+The example Actor in this guide uses the HTTP `AsyncFetcher`, which is the simplest to deploy and pairs well with Apify Proxy.
+
+## Example Actor
+
+The following Actor recursively scrapes titles from all linked pages, up to a user-defined maximum depth, starting from the URLs in the Actor input. It uses Scrapling's `AsyncFetcher` to fetch each page through [Apify Proxy](https://docs.apify.com/platform/proxy), and CSS selectors to extract the title, headings, and links.
+
+<RunnableCodeBlock className="language-python" language="python">
+    {ScraplingExample}
+</RunnableCodeBlock>
+
+A few things worth pointing out:
+
+- The response of `AsyncFetcher.get` is a Scrapling selector, so `response.css('title::text').get()` reads the page title and `response.css('a::attr(href)').getall()` returns every link's `href` in one call.
+- `response.urljoin(link_href)` resolves relative links against the page URL, so you can enqueue them directly.
+- The `impersonate='chrome'` and `stealthy_headers=True` options make the request look like it comes from a real Chrome browser, which - combined with Apify Proxy - reduces the chance of being blocked.
+
+## Using Apify Proxy
+
+Running on the Apify platform gives your scraper access to [Apify Proxy](https://docs.apify.com/platform/proxy), which rotates IP addresses to avoid rate limiting and blocking. The example above creates a proxy configuration and passes a fresh proxy URL to every request:
+
+```python
+proxy_configuration = await Actor.create_proxy_configuration()
+...
+proxy_url = None
+if proxy_configuration:
+    proxy_url = await proxy_configuration.new_url()
+
+response = await AsyncFetcher.get(url, proxy=proxy_url)
+```
+
+Scrapling accepts the proxy as a URL string (for example `http://user:pass@proxy.apify.com:8000`), which is exactly what `ProxyConfiguration.new_url` returns. To select specific proxy groups or a country, pass the relevant arguments to `Actor.create_proxy_configuration`. For more details, see the [Proxy management](../concepts/proxy-management) guide. The browser-based fetchers accept the same `proxy` argument.
+
+## Running browser-based fetchers
+
+`DynamicFetcher` and `StealthyFetcher` drive a real browser, so they need the browser binaries installed with the `scrapling install` command. Locally, run it once after installing the `scrapling[fetchers]` extra:
+
+```bash
+scrapling install
+```
+
+On the Apify platform, the Actor runs in a Docker container, so the browsers have to be installed during the image build. Build on top of the [Apify Playwright base image](https://hub.docker.com/r/apify/actor-python-playwright), which already ships a browser together with all of its system-level dependencies, and then download the browser binaries that Scrapling expects:
+
+<CodeBlock className="language-docker" title="Dockerfile">
+{`FROM apify/actor-python-playwright:3.14-1.60.0
+
+COPY --chown=myuser:myuser requirements.txt ./
+RUN pip install -r requirements.txt
+
+# Download the browser binaries Scrapling needs. The base image already provides
+# their system-level dependencies, so run this step as root.
+USER root
+RUN scrapling install
+USER myuser
+
+COPY --chown=myuser:myuser . ./
+RUN python -m compileall -q my_actor/
+
+CMD ["python", "-m", "my_actor"]`}
+</CodeBlock>
+
+Fetching a page then only differs in which fetcher you call - the parsing API is identical:
+
+```python
+from scrapling.fetchers import DynamicFetcher
+
+response = await DynamicFetcher.async_fetch(url, headless=True, network_idle=True)
+quotes = response.css('.quote .text::text').getall()
+```
+
+## Conclusion
+
+In this guide, you learned how to use Scrapling in your Apify Actors. You can now fetch pages with Scrapling's HTTP or browser-based fetchers, extract data with its CSS and XPath selectors, route requests through Apify Proxy, and run the whole thing on the Apify platform. See the [Actor templates](https://apify.com/templates/categories/python) to get started with your own scraping tasks. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy scraping!
+
+## Additional resources
+
+- [Scrapling: Official documentation](https://scrapling.readthedocs.io/)
+- [Scrapling: Fetchers](https://scrapling.readthedocs.io/en/latest/fetching/choosing/)
+- [Scrapling: Parsing and selecting elements](https://scrapling.readthedocs.io/en/latest/parsing/selection/)
+- [Scrapling: GitHub repository](https://github.com/D4Vinci/Scrapling)
+- [Apify: Proxy management](https://docs.apify.com/platform/proxy)
diff --git a/docs/03_guides/code/09_scrapling.py b/docs/03_guides/code/09_scrapling.py
new file mode 100644
index 00000000..fed1b5ae
--- /dev/null
+++ b/docs/03_guides/code/09_scrapling.py
@@ -0,0 +1,95 @@
+from __future__ import annotations
+
+from scrapling.fetchers import AsyncFetcher
+
+from apify import Actor, Request
+
+
+async def main() -> None:
+    # Enter the context of the Actor.
+    async with Actor:
+        # Retrieve the Actor input, and use default values if not provided.
+        actor_input = await Actor.get_input() or {}
+        start_urls = actor_input.get('start_urls', [{'url': 'https://crawlee.dev'}])
+        max_depth = actor_input.get('max_depth', 1)
+
+        # Exit if no start URLs are provided.
+        if not start_urls:
+            Actor.log.info('No start URLs specified in Actor input, exiting...')
+            await Actor.exit()
+
+        # Create a proxy configuration that routes requests through Apify Proxy.
+        proxy_configuration = await Actor.create_proxy_configuration()
+
+        # Open the default request queue for handling URLs to be processed.
+        request_queue = await Actor.open_request_queue()
+
+        # Enqueue the start URLs with an initial crawl depth of 0.
+        for start_url in start_urls:
+            url = start_url.get('url')
+            Actor.log.info(f'Enqueuing {url} ...')
+            new_request = Request.from_url(url, user_data={'depth': 0})
+            await request_queue.add_request(new_request)
+
+        # Process the URLs from the request queue.
+        while request := await request_queue.fetch_next_request():
+            url = request.url
+
+            if not isinstance(request.user_data['depth'], (str, int)):
+                raise TypeError('Request.depth is an unexpected type.')
+
+            depth = int(request.user_data['depth'])
+            Actor.log.info(f'Scraping {url} (depth={depth}) ...')
+
+            try:
+                # Get a fresh proxy URL for each request (None if no proxy is set up).
+                proxy_url = None
+                if proxy_configuration:
+                    proxy_url = await proxy_configuration.new_url()
+
+                # Fetch the page with Scrapling's asynchronous HTTP fetcher. The
+                # `impersonate` and `stealthy_headers` options make the request look
+                # like it comes from a real Chrome browser, reducing the chance of
+                # being blocked. The returned response is also a Scrapling selector.
+                response = await AsyncFetcher.get(
+                    url,
+                    proxy=proxy_url,
+                    impersonate='chrome',
+                    stealthy_headers=True,
+                    timeout=60,
+                )
+
+                # If the current depth is less than max_depth, find nested links
+                # and enqueue them. The `::attr(href)` pseudo-selector reads the
+                # attribute, and `response.urljoin` resolves it against the page URL.
+                if depth < max_depth:
+                    for link_href in response.css('a::attr(href)').getall():
+                        link_url = response.urljoin(link_href)
+
+                        if link_url.startswith(('http://', 'https://')):
+                            Actor.log.info(f'Enqueuing {link_url} ...')
+                            new_request = Request.from_url(
+                                link_url,
+                                user_data={'depth': depth + 1},
+                            )
+                            await request_queue.add_request(new_request)
+
+                # Extract the desired data using Scrapling's CSS selectors. The
+                # `::text` pseudo-element returns the text content of the elements.
+                data = {
+                    'url': url,
+                    'title': response.css('title::text').get(),
+                    'h1s': response.css('h1::text').getall(),
+                    'h2s': response.css('h2::text').getall(),
+                    'h3s': response.css('h3::text').getall(),
+                }
+
+                # Store the extracted data to the default dataset.
+                await Actor.push_data(data)
+
+            except Exception:
+                Actor.log.exception(f'Cannot extract data from {url}.')
+
+            finally:
+                # Mark the request as handled to ensure it is not processed again.
+                await request_queue.mark_request_as_handled(request)

From 29c4c8a8a35a83410f998ad8f6d6efea0f9decbf Mon Sep 17 00:00:00 2001
From: Vlada Dusek <v.dusek96@gmail.com>
Date: Fri, 5 Jun 2026 11:24:10 +0200
Subject: [PATCH 2/5] docs: Split Scrapling guide example into modules and use
 code tabs

---
 docs/03_guides/09_scrapling.mdx               | 93 +++++++++---------
 docs/03_guides/code/09_scrapling.py           | 95 -------------------
 .../code/scrapling_browser_project/Dockerfile | 21 ++++
 .../my_actor/scraper.py                       | 45 +++++++++
 .../scrapling_project/my_actor/__init__.py    |  0
 .../scrapling_project/my_actor/__main__.py    |  8 ++
 .../code/scrapling_project/my_actor/main.py   | 71 ++++++++++++++
 .../scrapling_project/my_actor/scraper.py     | 47 +++++++++
 pyproject.toml                                |  4 +
 9 files changed, 245 insertions(+), 139 deletions(-)
 delete mode 100644 docs/03_guides/code/09_scrapling.py
 create mode 100644 docs/03_guides/code/scrapling_browser_project/Dockerfile
 create mode 100644 docs/03_guides/code/scrapling_browser_project/my_actor/scraper.py
 create mode 100644 docs/03_guides/code/scrapling_project/my_actor/__init__.py
 create mode 100644 docs/03_guides/code/scrapling_project/my_actor/__main__.py
 create mode 100644 docs/03_guides/code/scrapling_project/my_actor/main.py
 create mode 100644 docs/03_guides/code/scrapling_project/my_actor/scraper.py

diff --git a/docs/03_guides/09_scrapling.mdx b/docs/03_guides/09_scrapling.mdx
index 459e5a25..3e76ebca 100644
--- a/docs/03_guides/09_scrapling.mdx
+++ b/docs/03_guides/09_scrapling.mdx
@@ -5,9 +5,14 @@ description: Build an Apify Actor that scrapes web pages using the Scrapling ada
 ---
 
 import CodeBlock from '@theme/CodeBlock';
-import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
 
-import ScraplingExample from '!!raw-loader!roa-loader!./code/09_scrapling.py';
+import ScraplingMain from '!!raw-loader!./code/scrapling_project/my_actor/main.py';
+import ScraplingScraper from '!!raw-loader!./code/scrapling_project/my_actor/scraper.py';
+import ScraplingEntrypoint from '!!raw-loader!./code/scrapling_project/my_actor/__main__.py';
+import ScraplingBrowserScraper from '!!raw-loader!./code/scrapling_browser_project/my_actor/scraper.py';
+import ScraplingBrowserDockerfile from '!!raw-loader!./code/scrapling_browser_project/Dockerfile';
 
 In this guide, you'll learn how to use the [Scrapling](https://scrapling.readthedocs.io/) library in your Apify Actors.
 
@@ -47,29 +52,40 @@ The example Actor in this guide uses the HTTP `AsyncFetcher`, which is the simpl
 
 The following Actor recursively scrapes titles from all linked pages, up to a user-defined maximum depth, starting from the URLs in the Actor input. It uses Scrapling's `AsyncFetcher` to fetch each page through [Apify Proxy](https://docs.apify.com/platform/proxy), and CSS selectors to extract the title, headings, and links.
 
-<RunnableCodeBlock className="language-python" language="python">
-    {ScraplingExample}
-</RunnableCodeBlock>
+The code is split into three small modules, following the structure of the Apify Python Actor templates:
+
+- `my_actor/main.py` - The Actor's main coroutine. It handles the [Actor](https://docs.apify.com/platform/actors) lifecycle, reads the input, sets up [Apify Proxy](https://docs.apify.com/platform/proxy) and the [request queue](https://docs.apify.com/platform/storage/request-queue), and drives the crawl.
+- `my_actor/scraper.py` - The Scrapling-specific logic. A single `scrape_page` function fetches a page and returns the extracted data together with the links found on it.
+- `my_actor/__main__.py` - The entry point that runs the `main` coroutine with `asyncio`.
+
+<Tabs>
+    <TabItem value="main.py" label="my_actor/main.py">
+        <CodeBlock className="language-python">
+            {ScraplingMain}
+        </CodeBlock>
+    </TabItem>
+    <TabItem value="scraper.py" label="my_actor/scraper.py">
+        <CodeBlock className="language-python">
+            {ScraplingScraper}
+        </CodeBlock>
+    </TabItem>
+    <TabItem value="__main__.py" label="my_actor/__main__.py">
+        <CodeBlock className="language-python">
+            {ScraplingEntrypoint}
+        </CodeBlock>
+    </TabItem>
+</Tabs>
 
 A few things worth pointing out:
 
+- Keeping the fetching and parsing in `scrape_page` separates the Scrapling-specific code from the Actor's orchestration logic. The function returns the extracted data together with the discovered links, so `my_actor/main.py` decides what to store and what to enqueue.
 - The response of `AsyncFetcher.get` is a Scrapling selector, so `response.css('title::text').get()` reads the page title and `response.css('a::attr(href)').getall()` returns every link's `href` in one call.
 - `response.urljoin(link_href)` resolves relative links against the page URL, so you can enqueue them directly.
 - The `impersonate='chrome'` and `stealthy_headers=True` options make the request look like it comes from a real Chrome browser, which - combined with Apify Proxy - reduces the chance of being blocked.
 
 ## Using Apify Proxy
 
-Running on the Apify platform gives your scraper access to [Apify Proxy](https://docs.apify.com/platform/proxy), which rotates IP addresses to avoid rate limiting and blocking. The example above creates a proxy configuration and passes a fresh proxy URL to every request:
-
-```python
-proxy_configuration = await Actor.create_proxy_configuration()
-...
-proxy_url = None
-if proxy_configuration:
-    proxy_url = await proxy_configuration.new_url()
-
-response = await AsyncFetcher.get(url, proxy=proxy_url)
-```
+Running on the Apify platform gives your scraper access to [Apify Proxy](https://docs.apify.com/platform/proxy), which rotates IP addresses to avoid rate limiting and blocking. In the example above, `my_actor/main.py` creates a proxy configuration with `Actor.create_proxy_configuration` and passes a fresh proxy URL to `scrape_page` for every request, which forwards it to Scrapling's `proxy` argument.
 
 Scrapling accepts the proxy as a URL string (for example `http://user:pass@proxy.apify.com:8000`), which is exactly what `ProxyConfiguration.new_url` returns. To select specific proxy groups or a country, pass the relevant arguments to `Actor.create_proxy_configuration`. For more details, see the [Proxy management](../concepts/proxy-management) guide. The browser-based fetchers accept the same `proxy` argument.
 
@@ -81,34 +97,23 @@ Scrapling accepts the proxy as a URL string (for example `http://user:pass@proxy
 scrapling install
 ```
 
-On the Apify platform, the Actor runs in a Docker container, so the browsers have to be installed during the image build. Build on top of the [Apify Playwright base image](https://hub.docker.com/r/apify/actor-python-playwright), which already ships a browser together with all of its system-level dependencies, and then download the browser binaries that Scrapling expects:
-
-<CodeBlock className="language-docker" title="Dockerfile">
-{`FROM apify/actor-python-playwright:3.14-1.60.0
-
-COPY --chown=myuser:myuser requirements.txt ./
-RUN pip install -r requirements.txt
-
-# Download the browser binaries Scrapling needs. The base image already provides
-# their system-level dependencies, so run this step as root.
-USER root
-RUN scrapling install
-USER myuser
-
-COPY --chown=myuser:myuser . ./
-RUN python -m compileall -q my_actor/
-
-CMD ["python", "-m", "my_actor"]`}
-</CodeBlock>
-
-Fetching a page then only differs in which fetcher you call - the parsing API is identical:
-
-```python
-from scrapling.fetchers import DynamicFetcher
-
-response = await DynamicFetcher.async_fetch(url, headless=True, network_idle=True)
-quotes = response.css('.quote .text::text').getall()
-```
+Switching the example Actor from HTTP to a real browser only takes two changes - the rest of the project, including `my_actor/main.py`, stays exactly the same:
+
+1. Swap the fetcher call in `my_actor/scraper.py` for `DynamicFetcher.async_fetch`. The parsing API is identical, so the data extraction is unchanged.
+2. Build on top of the [Apify Playwright base image](https://hub.docker.com/r/apify/actor-python-playwright), which already ships a browser together with all of its system-level dependencies, and run `scrapling install` during the build to download the browser binaries that Scrapling expects.
+
+<Tabs>
+    <TabItem value="scraper.py" label="my_actor/scraper.py">
+        <CodeBlock className="language-python">
+            {ScraplingBrowserScraper}
+        </CodeBlock>
+    </TabItem>
+    <TabItem value="Dockerfile" label="Dockerfile">
+        <CodeBlock className="language-docker">
+            {ScraplingBrowserDockerfile}
+        </CodeBlock>
+    </TabItem>
+</Tabs>
 
 ## Conclusion
 
diff --git a/docs/03_guides/code/09_scrapling.py b/docs/03_guides/code/09_scrapling.py
deleted file mode 100644
index fed1b5ae..00000000
--- a/docs/03_guides/code/09_scrapling.py
+++ /dev/null
@@ -1,95 +0,0 @@
-from __future__ import annotations
-
-from scrapling.fetchers import AsyncFetcher
-
-from apify import Actor, Request
-
-
-async def main() -> None:
-    # Enter the context of the Actor.
-    async with Actor:
-        # Retrieve the Actor input, and use default values if not provided.
-        actor_input = await Actor.get_input() or {}
-        start_urls = actor_input.get('start_urls', [{'url': 'https://crawlee.dev'}])
-        max_depth = actor_input.get('max_depth', 1)
-
-        # Exit if no start URLs are provided.
-        if not start_urls:
-            Actor.log.info('No start URLs specified in Actor input, exiting...')
-            await Actor.exit()
-
-        # Create a proxy configuration that routes requests through Apify Proxy.
-        proxy_configuration = await Actor.create_proxy_configuration()
-
-        # Open the default request queue for handling URLs to be processed.
-        request_queue = await Actor.open_request_queue()
-
-        # Enqueue the start URLs with an initial crawl depth of 0.
-        for start_url in start_urls:
-            url = start_url.get('url')
-            Actor.log.info(f'Enqueuing {url} ...')
-            new_request = Request.from_url(url, user_data={'depth': 0})
-            await request_queue.add_request(new_request)
-
-        # Process the URLs from the request queue.
-        while request := await request_queue.fetch_next_request():
-            url = request.url
-
-            if not isinstance(request.user_data['depth'], (str, int)):
-                raise TypeError('Request.depth is an unexpected type.')
-
-            depth = int(request.user_data['depth'])
-            Actor.log.info(f'Scraping {url} (depth={depth}) ...')
-
-            try:
-                # Get a fresh proxy URL for each request (None if no proxy is set up).
-                proxy_url = None
-                if proxy_configuration:
-                    proxy_url = await proxy_configuration.new_url()
-
-                # Fetch the page with Scrapling's asynchronous HTTP fetcher. The
-                # `impersonate` and `stealthy_headers` options make the request look
-                # like it comes from a real Chrome browser, reducing the chance of
-                # being blocked. The returned response is also a Scrapling selector.
-                response = await AsyncFetcher.get(
-                    url,
-                    proxy=proxy_url,
-                    impersonate='chrome',
-                    stealthy_headers=True,
-                    timeout=60,
-                )
-
-                # If the current depth is less than max_depth, find nested links
-                # and enqueue them. The `::attr(href)` pseudo-selector reads the
-                # attribute, and `response.urljoin` resolves it against the page URL.
-                if depth < max_depth:
-                    for link_href in response.css('a::attr(href)').getall():
-                        link_url = response.urljoin(link_href)
-
-                        if link_url.startswith(('http://', 'https://')):
-                            Actor.log.info(f'Enqueuing {link_url} ...')
-                            new_request = Request.from_url(
-                                link_url,
-                                user_data={'depth': depth + 1},
-                            )
-                            await request_queue.add_request(new_request)
-
-                # Extract the desired data using Scrapling's CSS selectors. The
-                # `::text` pseudo-element returns the text content of the elements.
-                data = {
-                    'url': url,
-                    'title': response.css('title::text').get(),
-                    'h1s': response.css('h1::text').getall(),
-                    'h2s': response.css('h2::text').getall(),
-                    'h3s': response.css('h3::text').getall(),
-                }
-
-                # Store the extracted data to the default dataset.
-                await Actor.push_data(data)
-
-            except Exception:
-                Actor.log.exception(f'Cannot extract data from {url}.')
-
-            finally:
-                # Mark the request as handled to ensure it is not processed again.
-                await request_queue.mark_request_as_handled(request)
diff --git a/docs/03_guides/code/scrapling_browser_project/Dockerfile b/docs/03_guides/code/scrapling_browser_project/Dockerfile
new file mode 100644
index 00000000..38b30c60
--- /dev/null
+++ b/docs/03_guides/code/scrapling_browser_project/Dockerfile
@@ -0,0 +1,21 @@
+# Use the Apify Playwright base image, which already ships a browser together
+# with all of its system-level dependencies.
+FROM apify/actor-python-playwright:3.14-1.60.0
+
+# Copy just requirements.txt first to leverage the Docker build cache.
+COPY --chown=myuser:myuser requirements.txt ./
+RUN pip install -r requirements.txt
+
+# Download the browser binaries that Scrapling expects. The base image already
+# provides their system-level dependencies, so run this step as root and then
+# switch back to the unprivileged user.
+USER root
+RUN scrapling install
+USER myuser
+
+# Copy the rest of the source code and verify that it compiles.
+COPY --chown=myuser:myuser . ./
+RUN python -m compileall -q my_actor/
+
+# Specify how to launch the Actor.
+CMD ["python", "-m", "my_actor"]
diff --git a/docs/03_guides/code/scrapling_browser_project/my_actor/scraper.py b/docs/03_guides/code/scrapling_browser_project/my_actor/scraper.py
new file mode 100644
index 00000000..fb7d4579
--- /dev/null
+++ b/docs/03_guides/code/scrapling_browser_project/my_actor/scraper.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from typing import Any
+
+from scrapling.fetchers import DynamicFetcher
+
+
+async def scrape_page(
+    url: str,
+    *,
+    proxy_url: str | None = None,
+) -> tuple[dict[str, Any], list[str]]:
+    """Fetch a single page in a real browser and extract its data and links.
+
+    `DynamicFetcher` drives a real browser via Playwright, so it can render
+    JavaScript-heavy pages. `network_idle` waits until the page stops making
+    network requests before the HTML is captured. Apart from the fetcher call,
+    everything else - including the parsing - is identical to the HTTP version.
+    """
+    response = await DynamicFetcher.async_fetch(
+        url,
+        proxy=proxy_url,
+        headless=True,
+        network_idle=True,
+    )
+
+    # Extract the desired data using CSS selectors. The `::text` pseudo-element
+    # returns the text content of the matched elements.
+    data = {
+        'url': url,
+        'title': response.css('title::text').get(),
+        'h1s': response.css('h1::text').getall(),
+        'h2s': response.css('h2::text').getall(),
+        'h3s': response.css('h3::text').getall(),
+    }
+
+    # Collect absolute links from the page. The `::attr(href)` pseudo-selector
+    # reads the attribute and `response.urljoin` resolves it against the page URL.
+    links: list[str] = []
+    for href in response.css('a::attr(href)').getall():
+        link_url = response.urljoin(href)
+        if link_url.startswith(('http://', 'https://')):
+            links.append(link_url)
+
+    return data, links
diff --git a/docs/03_guides/code/scrapling_project/my_actor/__init__.py b/docs/03_guides/code/scrapling_project/my_actor/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/docs/03_guides/code/scrapling_project/my_actor/__main__.py b/docs/03_guides/code/scrapling_project/my_actor/__main__.py
new file mode 100644
index 00000000..6aeaf3d5
--- /dev/null
+++ b/docs/03_guides/code/scrapling_project/my_actor/__main__.py
@@ -0,0 +1,8 @@
+from __future__ import annotations
+
+import asyncio
+
+from .main import main
+
+if __name__ == '__main__':
+    asyncio.run(main())
diff --git a/docs/03_guides/code/scrapling_project/my_actor/main.py b/docs/03_guides/code/scrapling_project/my_actor/main.py
new file mode 100644
index 00000000..d2cd36e7
--- /dev/null
+++ b/docs/03_guides/code/scrapling_project/my_actor/main.py
@@ -0,0 +1,71 @@
+from __future__ import annotations
+
+from apify import Actor, Request
+
+from .scraper import scrape_page
+
+
+async def main() -> None:
+    # Enter the context of the Actor.
+    async with Actor:
+        # Retrieve the Actor input, and use default values if not provided.
+        actor_input = await Actor.get_input() or {}
+        start_urls = actor_input.get('start_urls', [{'url': 'https://crawlee.dev'}])
+        max_depth = actor_input.get('max_depth', 1)
+
+        # Exit if no start URLs are provided.
+        if not start_urls:
+            Actor.log.info('No start URLs specified in Actor input, exiting...')
+            await Actor.exit()
+
+        # Create a proxy configuration that routes requests through Apify Proxy.
+        proxy_configuration = await Actor.create_proxy_configuration()
+
+        # Open the default request queue for handling URLs to be processed.
+        request_queue = await Actor.open_request_queue()
+
+        # Enqueue the start URLs with an initial crawl depth of 0.
+        for start_url in start_urls:
+            url = start_url.get('url')
+            Actor.log.info(f'Enqueuing {url} ...')
+            request = Request.from_url(url, user_data={'depth': 0})
+            await request_queue.add_request(request)
+
+        # Process the URLs from the request queue.
+        while request := await request_queue.fetch_next_request():
+            url = request.url
+
+            if not isinstance(request.user_data['depth'], (str, int)):
+                raise TypeError('Request.depth is an unexpected type.')
+
+            depth = int(request.user_data['depth'])
+            Actor.log.info(f'Scraping {url} (depth={depth}) ...')
+
+            try:
+                # Get a fresh proxy URL for each request (None if no proxy set up).
+                proxy_url = None
+                if proxy_configuration:
+                    proxy_url = await proxy_configuration.new_url()
+
+                # Fetch the page and extract its data and nested links.
+                data, links = await scrape_page(url, proxy_url=proxy_url)
+
+                # Store the extracted data to the default dataset.
+                await Actor.push_data(data)
+
+                # If we are not too deep yet, enqueue the links we found.
+                if depth < max_depth:
+                    for link_url in links:
+                        Actor.log.info(f'Enqueuing {link_url} ...')
+                        new_request = Request.from_url(
+                            link_url,
+                            user_data={'depth': depth + 1},
+                        )
+                        await request_queue.add_request(new_request)
+
+            except Exception:
+                Actor.log.exception(f'Cannot extract data from {url}.')
+
+            finally:
+                # Mark the request as handled so it is not processed again.
+                await request_queue.mark_request_as_handled(request)
diff --git a/docs/03_guides/code/scrapling_project/my_actor/scraper.py b/docs/03_guides/code/scrapling_project/my_actor/scraper.py
new file mode 100644
index 00000000..b840db82
--- /dev/null
+++ b/docs/03_guides/code/scrapling_project/my_actor/scraper.py
@@ -0,0 +1,47 @@
+from __future__ import annotations
+
+from typing import Any
+
+from scrapling.fetchers import AsyncFetcher
+
+
+async def scrape_page(
+    url: str,
+    *,
+    proxy_url: str | None = None,
+) -> tuple[dict[str, Any], list[str]]:
+    """Fetch a single page with Scrapling and extract its data and links.
+
+    The page is fetched with Scrapling's asynchronous HTTP fetcher. The
+    `impersonate` and `stealthy_headers` options make the request look like it
+    comes from a real Chrome browser, which reduces the chance of being blocked.
+    The returned response is also a Scrapling selector, so it can be queried with
+    CSS selectors directly.
+    """
+    response = await AsyncFetcher.get(
+        url,
+        proxy=proxy_url,
+        impersonate='chrome',
+        stealthy_headers=True,
+        timeout=60,
+    )
+
+    # Extract the desired data using CSS selectors. The `::text` pseudo-element
+    # returns the text content of the matched elements.
+    data = {
+        'url': url,
+        'title': response.css('title::text').get(),
+        'h1s': response.css('h1::text').getall(),
+        'h2s': response.css('h2::text').getall(),
+        'h3s': response.css('h3::text').getall(),
+    }
+
+    # Collect absolute links from the page. The `::attr(href)` pseudo-selector
+    # reads the attribute and `response.urljoin` resolves it against the page URL.
+    links: list[str] = []
+    for href in response.css('a::attr(href)').getall():
+        link_url = response.urljoin(href)
+        if link_url.startswith(('http://', 'https://')):
+            links.append(link_url)
+
+    return data, links
diff --git a/pyproject.toml b/pyproject.toml
index d17bdc01..d8697219 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -181,6 +181,10 @@ indent-style = "space"
     # Local imports in Scrapy project.
     "TID252", # Prefer absolute imports over relative imports from parent modules
 ]
+"**/docs/**/scrapling_project/**" = [
+    # Local imports are mixed up with the Apify SDK.
+    "I001", # Import block is un-sorted or un-formatted
+]
 
 [tool.ruff.lint.flake8-quotes]
 docstring-quotes = "double"

From 2a41a3f3e19b1e664adcbe35a39bfdacc58e816d Mon Sep 17 00:00:00 2001
From: Vlada Dusek <v.dusek96@gmail.com>
Date: Fri, 5 Jun 2026 12:00:53 +0200
Subject: [PATCH 3/5] docs: use Request.crawl_depth for depth tracking in
 Scrapling example

---
 .../code/scrapling_project/my_actor/main.py   | 20 ++++++++-----------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/docs/03_guides/code/scrapling_project/my_actor/main.py b/docs/03_guides/code/scrapling_project/my_actor/main.py
index d2cd36e7..52e9ef4c 100644
--- a/docs/03_guides/code/scrapling_project/my_actor/main.py
+++ b/docs/03_guides/code/scrapling_project/my_actor/main.py
@@ -24,21 +24,18 @@ async def main() -> None:
         # Open the default request queue for handling URLs to be processed.
         request_queue = await Actor.open_request_queue()
 
-        # Enqueue the start URLs with an initial crawl depth of 0.
+        # Enqueue the start URLs. Their crawl depth defaults to 0.
         for start_url in start_urls:
             url = start_url.get('url')
             Actor.log.info(f'Enqueuing {url} ...')
-            request = Request.from_url(url, user_data={'depth': 0})
-            await request_queue.add_request(request)
+            await request_queue.add_request(Request.from_url(url))
 
         # Process the URLs from the request queue.
         while request := await request_queue.fetch_next_request():
             url = request.url
 
-            if not isinstance(request.user_data['depth'], (str, int)):
-                raise TypeError('Request.depth is an unexpected type.')
-
-            depth = int(request.user_data['depth'])
+            # Read the crawl depth tracked by the request itself.
+            depth = request.crawl_depth
             Actor.log.info(f'Scraping {url} (depth={depth}) ...')
 
             try:
@@ -53,14 +50,13 @@ async def main() -> None:
                 # Store the extracted data to the default dataset.
                 await Actor.push_data(data)
 
-                # If we are not too deep yet, enqueue the links we found.
+                # If we are not too deep yet, enqueue the links we found one
+                # level deeper than the current page.
                 if depth < max_depth:
                     for link_url in links:
                         Actor.log.info(f'Enqueuing {link_url} ...')
-                        new_request = Request.from_url(
-                            link_url,
-                            user_data={'depth': depth + 1},
-                        )
+                        new_request = Request.from_url(link_url)
+                        new_request.crawl_depth = depth + 1
                         await request_queue.add_request(new_request)
 
             except Exception:

From 910df14999f02c3f22e9fac77322148a4f0630e2 Mon Sep 17 00:00:00 2001
From: Vlada Dusek <v.dusek96@gmail.com>
Date: Fri, 5 Jun 2026 20:45:03 +0200
Subject: [PATCH 4/5] docs: renumber Scrapling guide to 07 and switch to a
 single-file example

---
 .../{09_scrapling.mdx => 07_scrapling.mdx}    |  74 +++--------
 docs/03_guides/code/07_scrapling.py           | 122 ++++++++++++++++++
 .../scraper.py => 07_scrapling_browser.py}    |  16 +--
 .../code/scrapling_browser_project/Dockerfile |  21 ---
 .../scrapling_project/my_actor/__init__.py    |   0
 .../scrapling_project/my_actor/__main__.py    |   8 --
 .../code/scrapling_project/my_actor/main.py   |  67 ----------
 .../scrapling_project/my_actor/scraper.py     |  47 -------
 8 files changed, 146 insertions(+), 209 deletions(-)
 rename docs/03_guides/{09_scrapling.mdx => 07_scrapling.mdx} (63%)
 create mode 100644 docs/03_guides/code/07_scrapling.py
 rename docs/03_guides/code/{scrapling_browser_project/my_actor/scraper.py => 07_scrapling_browser.py} (52%)
 delete mode 100644 docs/03_guides/code/scrapling_browser_project/Dockerfile
 delete mode 100644 docs/03_guides/code/scrapling_project/my_actor/__init__.py
 delete mode 100644 docs/03_guides/code/scrapling_project/my_actor/__main__.py
 delete mode 100644 docs/03_guides/code/scrapling_project/my_actor/main.py
 delete mode 100644 docs/03_guides/code/scrapling_project/my_actor/scraper.py

diff --git a/docs/03_guides/09_scrapling.mdx b/docs/03_guides/07_scrapling.mdx
similarity index 63%
rename from docs/03_guides/09_scrapling.mdx
rename to docs/03_guides/07_scrapling.mdx
index 3e76ebca..63e948e5 100644
--- a/docs/03_guides/09_scrapling.mdx
+++ b/docs/03_guides/07_scrapling.mdx
@@ -1,20 +1,16 @@
 ---
 id: scrapling
-title: Use Scrapling
+title: Adaptive scraping with Scrapling
 description: Build an Apify Actor that scrapes web pages using the Scrapling adaptive web scraping library.
 ---
 
 import CodeBlock from '@theme/CodeBlock';
-import Tabs from '@theme/Tabs';
-import TabItem from '@theme/TabItem';
+import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock';
 
-import ScraplingMain from '!!raw-loader!./code/scrapling_project/my_actor/main.py';
-import ScraplingScraper from '!!raw-loader!./code/scrapling_project/my_actor/scraper.py';
-import ScraplingEntrypoint from '!!raw-loader!./code/scrapling_project/my_actor/__main__.py';
-import ScraplingBrowserScraper from '!!raw-loader!./code/scrapling_browser_project/my_actor/scraper.py';
-import ScraplingBrowserDockerfile from '!!raw-loader!./code/scrapling_browser_project/Dockerfile';
+import ScraplingExample from '!!raw-loader!roa-loader!./code/07_scrapling.py';
+import ScraplingBrowserScraper from '!!raw-loader!./code/07_scrapling_browser.py';
 
-In this guide, you'll learn how to use the [Scrapling](https://scrapling.readthedocs.io/) library in your Apify Actors.
+In this guide, you'll learn how to use the [Scrapling](https://scrapling.readthedocs.io/) library for adaptive web scraping in your Apify Actors.
 
 ## Introduction
 
@@ -50,42 +46,24 @@ The example Actor in this guide uses the HTTP `AsyncFetcher`, which is the simpl
 
 ## Example Actor
 
-The following Actor recursively scrapes titles from all linked pages, up to a user-defined maximum depth, starting from the URLs in the Actor input. It uses Scrapling's `AsyncFetcher` to fetch each page through [Apify Proxy](https://docs.apify.com/platform/proxy), and CSS selectors to extract the title, headings, and links.
-
-The code is split into three small modules, following the structure of the Apify Python Actor templates:
-
-- `my_actor/main.py` - The Actor's main coroutine. It handles the [Actor](https://docs.apify.com/platform/actors) lifecycle, reads the input, sets up [Apify Proxy](https://docs.apify.com/platform/proxy) and the [request queue](https://docs.apify.com/platform/storage/request-queue), and drives the crawl.
-- `my_actor/scraper.py` - The Scrapling-specific logic. A single `scrape_page` function fetches a page and returns the extracted data together with the links found on it.
-- `my_actor/__main__.py` - The entry point that runs the `main` coroutine with `asyncio`.
-
-<Tabs>
-    <TabItem value="main.py" label="my_actor/main.py">
-        <CodeBlock className="language-python">
-            {ScraplingMain}
-        </CodeBlock>
-    </TabItem>
-    <TabItem value="scraper.py" label="my_actor/scraper.py">
-        <CodeBlock className="language-python">
-            {ScraplingScraper}
-        </CodeBlock>
-    </TabItem>
-    <TabItem value="__main__.py" label="my_actor/__main__.py">
-        <CodeBlock className="language-python">
-            {ScraplingEntrypoint}
-        </CodeBlock>
-    </TabItem>
-</Tabs>
+The following Actor recursively scrapes data from linked pages on the same site, up to a user-defined maximum depth, starting from the URLs in the Actor input. It uses Scrapling's `AsyncFetcher` to fetch each page through [Apify Proxy](https://docs.apify.com/platform/proxy), and CSS selectors to extract the title, headings, and links.
+
+The whole Actor fits in a single file. A `scrape_page` helper holds the Scrapling-specific fetching and parsing, while the `main` coroutine handles the [Actor](https://docs.apify.com/platform/actors) lifecycle, reads the input, sets up [Apify Proxy](https://docs.apify.com/platform/proxy) and the [request queue](https://docs.apify.com/platform/storage/request-queue), and drives the crawl:
+
+<RunnableCodeBlock className="language-python" language="python">
+    {ScraplingExample}
+</RunnableCodeBlock>
 
 A few things worth pointing out:
 
-- Keeping the fetching and parsing in `scrape_page` separates the Scrapling-specific code from the Actor's orchestration logic. The function returns the extracted data together with the discovered links, so `my_actor/main.py` decides what to store and what to enqueue.
+- Keeping the fetching and parsing in `scrape_page` separates the Scrapling-specific code from the Actor's orchestration logic. The function returns the extracted data together with the discovered links, so `main` decides what to store and what to enqueue.
 - The response of `AsyncFetcher.get` is a Scrapling selector, so `response.css('title::text').get()` reads the page title and `response.css('a::attr(href)').getall()` returns every link's `href` in one call.
 - `response.urljoin(link_href)` resolves relative links against the page URL, so you can enqueue them directly.
 - The `impersonate='chrome'` and `stealthy_headers=True` options make the request look like it comes from a real Chrome browser, which - combined with Apify Proxy - reduces the chance of being blocked.
 
 ## Using Apify Proxy
 
-Running on the Apify platform gives your scraper access to [Apify Proxy](https://docs.apify.com/platform/proxy), which rotates IP addresses to avoid rate limiting and blocking. In the example above, `my_actor/main.py` creates a proxy configuration with `Actor.create_proxy_configuration` and passes a fresh proxy URL to `scrape_page` for every request, which forwards it to Scrapling's `proxy` argument.
+Running on the Apify platform gives your scraper access to [Apify Proxy](https://docs.apify.com/platform/proxy), which rotates IP addresses to avoid rate limiting and blocking. In the example above, `main` creates a proxy configuration with `Actor.create_proxy_configuration` and passes a fresh proxy URL to `scrape_page` for every request, which forwards it to Scrapling's `proxy` argument.
 
 Scrapling accepts the proxy as a URL string (for example `http://user:pass@proxy.apify.com:8000`), which is exactly what `ProxyConfiguration.new_url` returns. To select specific proxy groups or a country, pass the relevant arguments to `Actor.create_proxy_configuration`. For more details, see the [Proxy management](../concepts/proxy-management) guide. The browser-based fetchers accept the same `proxy` argument.
 
@@ -97,23 +75,13 @@ Scrapling accepts the proxy as a URL string (for example `http://user:pass@proxy
 scrapling install
 ```
 
-Switching the example Actor from HTTP to a real browser only takes two changes - the rest of the project, including `my_actor/main.py`, stays exactly the same:
-
-1. Swap the fetcher call in `my_actor/scraper.py` for `DynamicFetcher.async_fetch`. The parsing API is identical, so the data extraction is unchanged.
-2. Build on top of the [Apify Playwright base image](https://hub.docker.com/r/apify/actor-python-playwright), which already ships a browser together with all of its system-level dependencies, and run `scrapling install` during the build to download the browser binaries that Scrapling expects.
-
-<Tabs>
-    <TabItem value="scraper.py" label="my_actor/scraper.py">
-        <CodeBlock className="language-python">
-            {ScraplingBrowserScraper}
-        </CodeBlock>
-    </TabItem>
-    <TabItem value="Dockerfile" label="Dockerfile">
-        <CodeBlock className="language-docker">
-            {ScraplingBrowserDockerfile}
-        </CodeBlock>
-    </TabItem>
-</Tabs>
+Switching the example Actor from HTTP to a real browser takes only one code change - swap the `AsyncFetcher.get` call in `scrape_page` for `DynamicFetcher.async_fetch`. The parsing API is identical, so the rest of the Actor stays exactly the same:
+
+<CodeBlock className="language-python">
+    {ScraplingBrowserScraper}
+</CodeBlock>
+
+To run this on the Apify platform, build on top of the [Apify Playwright base image](https://hub.docker.com/r/apify/actor-python-playwright), which already ships a browser together with all of its system-level dependencies, and run `scrapling install` during the Docker build to download the browser binaries that Scrapling expects.
 
 ## Conclusion
 
diff --git a/docs/03_guides/code/07_scrapling.py b/docs/03_guides/code/07_scrapling.py
new file mode 100644
index 00000000..49aab31b
--- /dev/null
+++ b/docs/03_guides/code/07_scrapling.py
@@ -0,0 +1,122 @@
+import asyncio
+from typing import Any
+from urllib.parse import urlsplit
+
+from scrapling.fetchers import AsyncFetcher
+
+from apify import Actor, Request
+from apify.storages import RequestQueue
+
+
+async def scrape_page(
+    url: str,
+    *,
+    proxy_url: str | None = None,
+) -> tuple[dict[str, Any], list[str]]:
+    """Fetch a page with Scrapling's HTTP fetcher and return data and links."""
+    # `impersonate` and `stealthy_headers` make the request look like Chrome.
+    response = await AsyncFetcher.get(
+        url,
+        proxy=proxy_url,
+        impersonate='chrome',
+        stealthy_headers=True,
+        timeout=60,
+    )
+
+    data = {
+        'url': url,
+        'title': response.css('title::text').get(),
+        'h1s': response.css('h1::text').getall(),
+        'h2s': response.css('h2::text').getall(),
+        'h3s': response.css('h3::text').getall(),
+    }
+
+    # Keep only absolute links on the same host.
+    links: list[str] = []
+    host = urlsplit(url).netloc
+    for href in response.css('a::attr(href)').getall():
+        link_url = response.urljoin(href)
+        if not link_url.startswith(('http://', 'https://')):
+            continue
+        if urlsplit(link_url).netloc == host:
+            links.append(link_url)
+
+    return data, links
+
+
+async def enqueue_links(
+    request_queue: RequestQueue,
+    links: list[str],
+    *,
+    depth: int,
+    max_depth: int,
+) -> None:
+    """Enqueue the links one level deeper, unless max_depth was reached."""
+    if depth >= max_depth:
+        return
+
+    for link_url in links:
+        Actor.log.info(f'Enqueuing {link_url} ...')
+        request = Request.from_url(link_url)
+        request.crawl_depth = depth + 1
+        await request_queue.add_request(request)
+
+
+async def main() -> None:
+    async with Actor:
+        # Read the Actor input.
+        actor_input = await Actor.get_input() or {}
+        start_urls = actor_input.get('startUrls', [{'url': 'https://crawlee.dev'}])
+        max_depth = actor_input.get('maxDepth', 1)
+
+        if not start_urls:
+            Actor.log.info('No start URLs specified in Actor input, exiting...')
+            await Actor.exit()
+
+        # Set up Apify Proxy and the request queue.
+        proxy_configuration = await Actor.create_proxy_configuration()
+        request_queue = await Actor.open_request_queue()
+
+        # Enqueue the start URLs (crawl depth defaults to 0).
+        for start_url in start_urls:
+            url = start_url.get('url')
+            Actor.log.info(f'Enqueuing start URL: {url}')
+            await request_queue.add_request(Request.from_url(url))
+
+        # Cap the crawl; raise or remove to follow more pages.
+        max_requests = 50
+        handled_requests = 0
+
+        while handled_requests < max_requests and (
+            request := await request_queue.fetch_next_request()
+        ):
+            handled_requests += 1
+            url = request.url
+            depth = request.crawl_depth
+            Actor.log.info(f'Scraping {url} (depth={depth}) ...')
+
+            try:
+                # Fresh proxy URL per request (None if no proxy).
+                proxy_url = None
+                if proxy_configuration:
+                    proxy_url = await proxy_configuration.new_url()
+
+                data, links = await scrape_page(url, proxy_url=proxy_url)
+                await Actor.push_data(data)
+                Actor.log.info(
+                    f'Stored data from {url} '
+                    f'(title={data["title"]!r}, {len(links)} links found).'
+                )
+                await enqueue_links(
+                    request_queue, links, depth=depth, max_depth=max_depth
+                )
+
+            except Exception:
+                Actor.log.exception(f'Cannot extract data from {url}.')
+
+            finally:
+                await request_queue.mark_request_as_handled(request)
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
diff --git a/docs/03_guides/code/scrapling_browser_project/my_actor/scraper.py b/docs/03_guides/code/07_scrapling_browser.py
similarity index 52%
rename from docs/03_guides/code/scrapling_browser_project/my_actor/scraper.py
rename to docs/03_guides/code/07_scrapling_browser.py
index fb7d4579..3eb50e24 100644
--- a/docs/03_guides/code/scrapling_browser_project/my_actor/scraper.py
+++ b/docs/03_guides/code/07_scrapling_browser.py
@@ -1,5 +1,3 @@
-from __future__ import annotations
-
 from typing import Any
 
 from scrapling.fetchers import DynamicFetcher
@@ -10,13 +8,8 @@ async def scrape_page(
     *,
     proxy_url: str | None = None,
 ) -> tuple[dict[str, Any], list[str]]:
-    """Fetch a single page in a real browser and extract its data and links.
-
-    `DynamicFetcher` drives a real browser via Playwright, so it can render
-    JavaScript-heavy pages. `network_idle` waits until the page stops making
-    network requests before the HTML is captured. Apart from the fetcher call,
-    everything else - including the parsing - is identical to the HTTP version.
-    """
+    """Fetch a page in a real browser with Scrapling and return data and links."""
+    # `network_idle` waits until the page stops making network requests.
     response = await DynamicFetcher.async_fetch(
         url,
         proxy=proxy_url,
@@ -24,8 +17,6 @@ async def scrape_page(
         network_idle=True,
     )
 
-    # Extract the desired data using CSS selectors. The `::text` pseudo-element
-    # returns the text content of the matched elements.
     data = {
         'url': url,
         'title': response.css('title::text').get(),
@@ -34,8 +25,7 @@ async def scrape_page(
         'h3s': response.css('h3::text').getall(),
     }
 
-    # Collect absolute links from the page. The `::attr(href)` pseudo-selector
-    # reads the attribute and `response.urljoin` resolves it against the page URL.
+    # Collect absolute links from the page.
     links: list[str] = []
     for href in response.css('a::attr(href)').getall():
         link_url = response.urljoin(href)
diff --git a/docs/03_guides/code/scrapling_browser_project/Dockerfile b/docs/03_guides/code/scrapling_browser_project/Dockerfile
deleted file mode 100644
index 38b30c60..00000000
--- a/docs/03_guides/code/scrapling_browser_project/Dockerfile
+++ /dev/null
@@ -1,21 +0,0 @@
-# Use the Apify Playwright base image, which already ships a browser together
-# with all of its system-level dependencies.
-FROM apify/actor-python-playwright:3.14-1.60.0
-
-# Copy just requirements.txt first to leverage the Docker build cache.
-COPY --chown=myuser:myuser requirements.txt ./
-RUN pip install -r requirements.txt
-
-# Download the browser binaries that Scrapling expects. The base image already
-# provides their system-level dependencies, so run this step as root and then
-# switch back to the unprivileged user.
-USER root
-RUN scrapling install
-USER myuser
-
-# Copy the rest of the source code and verify that it compiles.
-COPY --chown=myuser:myuser . ./
-RUN python -m compileall -q my_actor/
-
-# Specify how to launch the Actor.
-CMD ["python", "-m", "my_actor"]
diff --git a/docs/03_guides/code/scrapling_project/my_actor/__init__.py b/docs/03_guides/code/scrapling_project/my_actor/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/docs/03_guides/code/scrapling_project/my_actor/__main__.py b/docs/03_guides/code/scrapling_project/my_actor/__main__.py
deleted file mode 100644
index 6aeaf3d5..00000000
--- a/docs/03_guides/code/scrapling_project/my_actor/__main__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-
-from .main import main
-
-if __name__ == '__main__':
-    asyncio.run(main())
diff --git a/docs/03_guides/code/scrapling_project/my_actor/main.py b/docs/03_guides/code/scrapling_project/my_actor/main.py
deleted file mode 100644
index 52e9ef4c..00000000
--- a/docs/03_guides/code/scrapling_project/my_actor/main.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from __future__ import annotations
-
-from apify import Actor, Request
-
-from .scraper import scrape_page
-
-
-async def main() -> None:
-    # Enter the context of the Actor.
-    async with Actor:
-        # Retrieve the Actor input, and use default values if not provided.
-        actor_input = await Actor.get_input() or {}
-        start_urls = actor_input.get('start_urls', [{'url': 'https://crawlee.dev'}])
-        max_depth = actor_input.get('max_depth', 1)
-
-        # Exit if no start URLs are provided.
-        if not start_urls:
-            Actor.log.info('No start URLs specified in Actor input, exiting...')
-            await Actor.exit()
-
-        # Create a proxy configuration that routes requests through Apify Proxy.
-        proxy_configuration = await Actor.create_proxy_configuration()
-
-        # Open the default request queue for handling URLs to be processed.
-        request_queue = await Actor.open_request_queue()
-
-        # Enqueue the start URLs. Their crawl depth defaults to 0.
-        for start_url in start_urls:
-            url = start_url.get('url')
-            Actor.log.info(f'Enqueuing {url} ...')
-            await request_queue.add_request(Request.from_url(url))
-
-        # Process the URLs from the request queue.
-        while request := await request_queue.fetch_next_request():
-            url = request.url
-
-            # Read the crawl depth tracked by the request itself.
-            depth = request.crawl_depth
-            Actor.log.info(f'Scraping {url} (depth={depth}) ...')
-
-            try:
-                # Get a fresh proxy URL for each request (None if no proxy set up).
-                proxy_url = None
-                if proxy_configuration:
-                    proxy_url = await proxy_configuration.new_url()
-
-                # Fetch the page and extract its data and nested links.
-                data, links = await scrape_page(url, proxy_url=proxy_url)
-
-                # Store the extracted data to the default dataset.
-                await Actor.push_data(data)
-
-                # If we are not too deep yet, enqueue the links we found one
-                # level deeper than the current page.
-                if depth < max_depth:
-                    for link_url in links:
-                        Actor.log.info(f'Enqueuing {link_url} ...')
-                        new_request = Request.from_url(link_url)
-                        new_request.crawl_depth = depth + 1
-                        await request_queue.add_request(new_request)
-
-            except Exception:
-                Actor.log.exception(f'Cannot extract data from {url}.')
-
-            finally:
-                # Mark the request as handled so it is not processed again.
-                await request_queue.mark_request_as_handled(request)
diff --git a/docs/03_guides/code/scrapling_project/my_actor/scraper.py b/docs/03_guides/code/scrapling_project/my_actor/scraper.py
deleted file mode 100644
index b840db82..00000000
--- a/docs/03_guides/code/scrapling_project/my_actor/scraper.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from __future__ import annotations
-
-from typing import Any
-
-from scrapling.fetchers import AsyncFetcher
-
-
-async def scrape_page(
-    url: str,
-    *,
-    proxy_url: str | None = None,
-) -> tuple[dict[str, Any], list[str]]:
-    """Fetch a single page with Scrapling and extract its data and links.
-
-    The page is fetched with Scrapling's asynchronous HTTP fetcher. The
-    `impersonate` and `stealthy_headers` options make the request look like it
-    comes from a real Chrome browser, which reduces the chance of being blocked.
-    The returned response is also a Scrapling selector, so it can be queried with
-    CSS selectors directly.
-    """
-    response = await AsyncFetcher.get(
-        url,
-        proxy=proxy_url,
-        impersonate='chrome',
-        stealthy_headers=True,
-        timeout=60,
-    )
-
-    # Extract the desired data using CSS selectors. The `::text` pseudo-element
-    # returns the text content of the matched elements.
-    data = {
-        'url': url,
-        'title': response.css('title::text').get(),
-        'h1s': response.css('h1::text').getall(),
-        'h2s': response.css('h2::text').getall(),
-        'h3s': response.css('h3::text').getall(),
-    }
-
-    # Collect absolute links from the page. The `::attr(href)` pseudo-selector
-    # reads the attribute and `response.urljoin` resolves it against the page URL.
-    links: list[str] = []
-    for href in response.css('a::attr(href)').getall():
-        link_url = response.urljoin(href)
-        if link_url.startswith(('http://', 'https://')):
-            links.append(link_url)
-
-    return data, links

From 404bdfb23d4e951b3d63f9660f8f6ef6e8107533 Mon Sep 17 00:00:00 2001
From: Vlada Dusek <v.dusek96@gmail.com>
Date: Fri, 5 Jun 2026 21:02:44 +0200
Subject: [PATCH 5/5] chore: drop unused ruff ignore for the removed Scrapling
 project

---
 pyproject.toml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index d8697219..d17bdc01 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -181,10 +181,6 @@ indent-style = "space"
     # Local imports in Scrapy project.
     "TID252", # Prefer absolute imports over relative imports from parent modules
 ]
-"**/docs/**/scrapling_project/**" = [
-    # Local imports are mixed up with the Apify SDK.
-    "I001", # Import block is un-sorted or un-formatted
-]
 
 [tool.ruff.lint.flake8-quotes]
 docstring-quotes = "double"