diff --git a/python/README.md b/python/README.md
index 6d1c8128..52e8eebf 100644
--- a/python/README.md
+++ b/python/README.md
@@ -28,14 +28,46 @@ import asyncio
 from copilot import CopilotClient
 
 async def main():
-    # Create and start client
+    # Client automatically starts on enter and cleans up on exit
+    async with CopilotClient() as client:
+        # Create a session with automatic cleanup
+        async with await client.create_session({"model": "gpt-4o"}) as session:
+            # Wait for response using session.idle event
+            done = asyncio.Event()
+
+            def on_event(event):
+                if event.type.value == "assistant.message":
+                    print(event.data.content)
+                elif event.type.value == "session.idle":
+                    done.set()
+
+            session.on(on_event)
+
+            # Send a message and wait for completion
+            await session.send("What is 2+2?")
+            await done.wait()
+
+        # Session automatically disconnected here
+
+    # Client automatically stopped here
+
+asyncio.run(main())
+```
+
+### Manual Resource Management
+
+If you need more control over the lifecycle, you can call `start()`, `stop()`, and `disconnect()` manually:
+
+```python
+import asyncio
+from copilot import CopilotClient
+
+async def main():
     client = CopilotClient()
     await client.start()
 
-    # Create a session
-    session = await client.create_session({"model": "gpt-5"})
+    session = await client.create_session({"model": "gpt-4o"})
 
-    # Wait for response using session.idle event
     done = asyncio.Event()
 
     def on_event(event):
@@ -45,26 +77,16 @@ async def main():
             done.set()
 
     session.on(on_event)
-
-    # Send a message and wait for completion
     await session.send("What is 2+2?")
     await done.wait()
 
-    # Clean up
+    # Clean up manually
     await session.disconnect()
     await client.stop()
 
 asyncio.run(main())
 ```
 
-Sessions also support the `async with` context manager pattern for automatic cleanup:
-
-```python
-async with await client.create_session({"model": "gpt-5"}) as session:
-    await session.send("What is 2+2?")
-    # session is automatically disconnected when leaving the block
-```
-
 ## Features
 
 - ✅ Full JSON-RPC protocol support
@@ -73,6 +95,7 @@ async with await client.create_session({"model": "gpt-5"}) as session:
 - ✅ Session history with `get_messages()`
 - ✅ Type hints throughout
 - ✅ Async/await native
+- ✅ Async context manager support for automatic resource cleanup
 
 ## API Reference
 
@@ -81,24 +104,19 @@ async with await client.create_session({"model": "gpt-5"}) as session:
 ```python
 from copilot import CopilotClient, SubprocessConfig
 
-# Spawn a local CLI process (default)
-client = CopilotClient()  # uses bundled CLI, stdio transport
-await client.start()
+async with CopilotClient() as client:
+    async with await client.create_session({"model": "gpt-4o"}) as session:
+        def on_event(event):
+            print(f"Event: {event['type']}")
 
-session = await client.create_session({"model": "gpt-5"})
+        session.on(on_event)
+        await session.send("Hello!")
 
-def on_event(event):
-    print(f"Event: {event['type']}")
-
-session.on(on_event)
-await session.send("Hello!")
-
-# ... wait for events ...
-
-await session.disconnect()
-await client.stop()
+        # ... wait for events ...
 ```
 
+> **Note:** For manual lifecycle management, see [Manual Resource Management](#manual-resource-management) above.
+
 ```python
 from copilot import CopilotClient, ExternalServerConfig
 
@@ -136,7 +154,7 @@ CopilotClient(
 
 **SessionConfig Options (for `create_session`):**
 
-- `model` (str): Model to use ("gpt-5", "claude-sonnet-4.5", etc.). **Required when using custom provider.**
+- `model` (str): Model to use ("gpt-4o", "claude-sonnet-4.5", etc.). **Required when using custom provider.**
 - `reasoning_effort` (str): Reasoning effort level for models that support it ("low", "medium", "high", "xhigh"). Use `list_models()` to check which models support this option.
 - `session_id` (str): Custom session ID
 - `tools` (list): Custom tools exposed to the CLI
@@ -192,10 +210,11 @@ async def lookup_issue(params: LookupIssueParams) -> str:
     issue = await fetch_issue(params.id)
     return issue.summary
 
-session = await client.create_session({
-    "model": "gpt-5",
+async with await client.create_session({
+    "model": "gpt-4o",
     "tools": [lookup_issue],
-})
+}) as session:
+    ...
 ```
 
 > **Note:** When using `from __future__ import annotations`, define Pydantic models at module level (not inside functions).
@@ -216,8 +235,8 @@ async def lookup_issue(invocation):
         "sessionLog": f"Fetched issue {issue_id}",
     }
 
-session = await client.create_session({
-    "model": "gpt-5",
+async with await client.create_session({
+    "model": "gpt-4o",
     "tools": [
         Tool(
             name="lookup_issue",
@@ -232,7 +251,8 @@ session = await client.create_session({
             handler=lookup_issue,
         )
     ],
-})
+}) as session:
+    ...
 ```
 
 The SDK automatically handles `tool.call`, executes your handler (sync or async), and responds with the final result when the tool completes.
@@ -292,44 +312,38 @@ import asyncio
 from copilot import CopilotClient
 
 async def main():
-    client = CopilotClient()
-    await client.start()
-
-    session = await client.create_session({
-        "model": "gpt-5",
-        "streaming": True
-    })
-
-    # Use asyncio.Event to wait for completion
-    done = asyncio.Event()
-
-    def on_event(event):
-        if event.type.value == "assistant.message_delta":
-            # Streaming message chunk - print incrementally
-            delta = event.data.delta_content or ""
-            print(delta, end="", flush=True)
-        elif event.type.value == "assistant.reasoning_delta":
-            # Streaming reasoning chunk (if model supports reasoning)
-            delta = event.data.delta_content or ""
-            print(delta, end="", flush=True)
-        elif event.type.value == "assistant.message":
-            # Final message - complete content
-            print("\n--- Final message ---")
-            print(event.data.content)
-        elif event.type.value == "assistant.reasoning":
-            # Final reasoning content (if model supports reasoning)
-            print("--- Reasoning ---")
-            print(event.data.content)
-        elif event.type.value == "session.idle":
-            # Session finished processing
-            done.set()
-
-    session.on(on_event)
-    await session.send("Tell me a short story")
-    await done.wait()  # Wait for streaming to complete
-
-    await session.disconnect()
-    await client.stop()
+    async with CopilotClient() as client:
+        async with await client.create_session({
+            "model": "gpt-4o",
+            "streaming": True,
+        }) as session:
+            # Use asyncio.Event to wait for completion
+            done = asyncio.Event()
+
+            def on_event(event):
+                if event.type.value == "assistant.message_delta":
+                    # Streaming message chunk - print incrementally
+                    delta = event.data.delta_content or ""
+                    print(delta, end="", flush=True)
+                elif event.type.value == "assistant.reasoning_delta":
+                    # Streaming reasoning chunk (if model supports reasoning)
+                    delta = event.data.delta_content or ""
+                    print(delta, end="", flush=True)
+                elif event.type.value == "assistant.message":
+                    # Final message - complete content
+                    print("\n--- Final message ---")
+                    print(event.data.content)
+                elif event.type.value == "assistant.reasoning":
+                    # Final reasoning content (if model supports reasoning)
+                    print("--- Reasoning ---")
+                    print(event.data.content)
+                elif event.type.value == "session.idle":
+                    # Session finished processing
+                    done.set()
+
+            session.on(on_event)
+            await session.send("Tell me a short story")
+            await done.wait()  # Wait for streaming to complete
 
 asyncio.run(main())
 ```
@@ -349,27 +363,28 @@ By default, sessions use **infinite sessions** which automatically manage contex
 
 ```python
 # Default: infinite sessions enabled with default thresholds
-session = await client.create_session({"model": "gpt-5"})
-
-# Access the workspace path for checkpoints and files
-print(session.workspace_path)
-# => ~/.copilot/session-state/{session_id}/
+async with await client.create_session({"model": "gpt-4o"}) as session:
+    # Access the workspace path for checkpoints and files
+    print(session.workspace_path)
+    # => ~/.copilot/session-state/{session_id}/
 
 # Custom thresholds
-session = await client.create_session({
-    "model": "gpt-5",
+async with await client.create_session({
+    "model": "gpt-4o",
     "infinite_sessions": {
         "enabled": True,
         "background_compaction_threshold": 0.80,  # Start compacting at 80% context usage
         "buffer_exhaustion_threshold": 0.95,  # Block at 95% until compaction completes
     },
-})
+}) as session:
+    ...
 
 # Disable infinite sessions
-session = await client.create_session({
-    "model": "gpt-5",
+async with await client.create_session({
+    "model": "gpt-4o",
     "infinite_sessions": {"enabled": False},
-})
+}) as session:
+    ...
 ```
 
 When enabled, sessions emit compaction events:
@@ -393,16 +408,15 @@ The SDK supports custom OpenAI-compatible API providers (BYOK - Bring Your Own K
 **Example with Ollama:**
 
 ```python
-session = await client.create_session({
+async with await client.create_session({
     "model": "deepseek-coder-v2:16b",  # Required when using custom provider
     "provider": {
         "type": "openai",
         "base_url": "http://localhost:11434/v1",  # Ollama endpoint
         # api_key not required for Ollama
     },
-})
-
-await session.send("Hello!")
+}) as session:
+    await session.send("Hello!")
 ```
 
 **Example with custom OpenAI-compatible API:**
@@ -410,14 +424,15 @@ await session.send("Hello!")
 ```python
 import os
 
-session = await client.create_session({
+async with await client.create_session({
     "model": "gpt-4",
     "provider": {
         "type": "openai",
         "base_url": "https://my-api.example.com/v1",
         "api_key": os.environ["MY_API_KEY"],
     },
-})
+}) as session:
+    ...
 ```
 
 **Example with Azure OpenAI:**
@@ -425,7 +440,7 @@ session = await client.create_session({
 ```python
 import os
 
-session = await client.create_session({
+async with await client.create_session({
     "model": "gpt-4",
     "provider": {
         "type": "azure",  # Must be "azure" for Azure endpoints, NOT "openai"
@@ -435,7 +450,8 @@ session = await client.create_session({
             "api_version": "2024-10-21",
         },
     },
-})
+}) as session:
+    ...
 ```
 
 > **Important notes:**
@@ -489,10 +505,11 @@ async def handle_user_input(request, invocation):
         "wasFreeform": True,  # Whether the answer was freeform (not from choices)
     }
 
-session = await client.create_session({
-    "model": "gpt-5",
+async with await client.create_session({
+    "model": "gpt-4o",
     "on_user_input_request": handle_user_input,
-})
+}) as session:
+    ...
 ```
 
 ## Session Hooks
@@ -536,8 +553,8 @@ async def on_error_occurred(input, invocation):
         "errorHandling": "retry",  # "retry", "skip", or "abort"
     }
 
-session = await client.create_session({
-    "model": "gpt-5",
+async with await client.create_session({
+    "model": "gpt-4o",
     "hooks": {
         "on_pre_tool_use": on_pre_tool_use,
         "on_post_tool_use": on_post_tool_use,
@@ -546,7 +563,8 @@ session = await client.create_session({
         "on_session_end": on_session_end,
         "on_error_occurred": on_error_occurred,
     },
-})
+}) as session:
+    ...
 ```
 
 **Available hooks:**
diff --git a/python/copilot/client.py b/python/copilot/client.py
index 0d8074fe..7ea20465 100644
--- a/python/copilot/client.py
+++ b/python/copilot/client.py
@@ -23,6 +23,7 @@
 import uuid
 from collections.abc import Awaitable, Callable
 from pathlib import Path
+from types import TracebackType
 from typing import Any, cast, overload
 
 from .generated.rpc import ServerRpc
@@ -256,6 +257,38 @@ def _parse_cli_url(self, url: str) -> tuple[str, int]:
 
         return (host, port)
 
+    async def __aenter__(self) -> "CopilotClient":
+        """
+        Enter the async context manager.
+
+        Automatically starts the CLI server and establishes a connection if not
+        already connected.
+
+        Returns:
+            The CopilotClient instance.
+
+        Example:
+            >>> async with CopilotClient() as client:
+            ...     session = await client.create_session()
+            ...     await session.send("Hello!")
+        """
+        await self.start()
+        return self
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None = None,
+        exc_val: BaseException | None = None,
+        exc_tb: TracebackType | None = None,
+    ) -> None:
+        """
+        Exit the async context manager.
+
+        Performs graceful cleanup by destroying all active sessions and stopping
+        the CLI server.
+        """
+        await self.stop()
+
     async def start(self) -> None:
         """
         Start the CLI server and establish a connection.
diff --git a/python/copilot/session.py b/python/copilot/session.py
index e4a17f2f..be89da74 100644
--- a/python/copilot/session.py
+++ b/python/copilot/session.py
@@ -9,6 +9,7 @@
 import inspect
 import threading
 from collections.abc import Callable
+from types import TracebackType
 from typing import Any, Literal, cast
 
 from .generated.rpc import (
@@ -98,6 +99,7 @@ def __init__(self, session_id: str, client: Any, workspace_path: str | None = No
         self._hooks: SessionHooks | None = None
         self._hooks_lock = threading.Lock()
         self._rpc: SessionRpc | None = None
+        self._destroyed = False
 
     @property
     def rpc(self) -> SessionRpc:
@@ -673,20 +675,33 @@ async def disconnect(self) -> None:
 
         After calling this method, the session object can no longer be used.
 
+        This method is idempotent—calling it multiple times is safe and will
+        not raise an error if the session is already disconnected.
+
         Raises:
-            Exception: If the connection fails.
+            Exception: If the connection fails (on first disconnect call).
 
         Example:
             >>> # Clean up when done — session can still be resumed later
             >>> await session.disconnect()
         """
-        await self._client.request("session.destroy", {"sessionId": self.session_id})
+        # Ensure that the check and update of _destroyed are atomic so that
+        # only the first caller proceeds to send the destroy RPC.
         with self._event_handlers_lock:
-            self._event_handlers.clear()
-        with self._tool_handlers_lock:
-            self._tool_handlers.clear()
-        with self._permission_handler_lock:
-            self._permission_handler = None
+            if self._destroyed:
+                return
+            self._destroyed = True
+
+        try:
+            await self._client.request("session.destroy", {"sessionId": self.session_id})
+        finally:
+            # Clear handlers even if the request fails.
+            with self._event_handlers_lock:
+                self._event_handlers.clear()
+            with self._tool_handlers_lock:
+                self._tool_handlers.clear()
+            with self._permission_handler_lock:
+                self._permission_handler = None
 
     async def destroy(self) -> None:
         """
@@ -709,11 +724,32 @@ async def destroy(self) -> None:
         await self.disconnect()
 
     async def __aenter__(self) -> "CopilotSession":
-        """Enable use as an async context manager."""
+        """
+        Enter the async context manager.
+
+        Returns the session instance, ready for use. The session must already be
+        created (via CopilotClient.create_session or resume_session).
+
+        Returns:
+            The CopilotSession instance.
+
+        Example:
+            >>> async with await client.create_session() as session:
+            ...     await session.send("Hello!")
+        """
         return self
 
-    async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
-        """Disconnect the session when exiting the context manager."""
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None = None,
+        exc_val: BaseException | None = None,
+        exc_tb: TracebackType | None = None,
+    ) -> None:
+        """
+        Exit the async context manager.
+
+        Automatically disconnects the session and releases all associated resources.
+        """
         await self.disconnect()
 
     async def abort(self) -> None:
diff --git a/python/test_client.py b/python/test_client.py
index 9b7e8eb0..5f2d18bf 100644
--- a/python/test_client.py
+++ b/python/test_client.py
@@ -4,6 +4,8 @@
 This file is for unit tests. Where relevant, prefer to add e2e tests in e2e/*.py instead.
 """
 
+from unittest.mock import AsyncMock, patch
+
 import pytest
 
 from copilot import (
@@ -482,3 +484,55 @@ async def mock_request(method, params):
             assert captured["session.model.switchTo"]["modelId"] == "gpt-4.1"
         finally:
             await client.force_stop()
+
+
+class TestCopilotClientContextManager:
+    @pytest.mark.asyncio
+    async def test_aenter_calls_start_and_returns_self(self):
+        client = CopilotClient(SubprocessConfig(cli_path=CLI_PATH))
+        with patch.object(client, "start", new_callable=AsyncMock) as mock_start:
+            result = await client.__aenter__()
+            mock_start.assert_awaited_once()
+            assert result is client
+
+    @pytest.mark.asyncio
+    async def test_aexit_calls_stop(self):
+        client = CopilotClient(SubprocessConfig(cli_path=CLI_PATH))
+        with patch.object(client, "stop", new_callable=AsyncMock) as mock_stop:
+            await client.__aexit__(None, None, None)
+            mock_stop.assert_awaited_once()
+
+    @pytest.mark.asyncio
+    async def test_aexit_returns_none(self):
+        client = CopilotClient(SubprocessConfig(cli_path=CLI_PATH))
+        with patch.object(client, "stop", new_callable=AsyncMock):
+            result = await client.__aexit__(None, None, None)
+            assert result is None
+
+
+class TestCopilotSessionContextManager:
+    @pytest.mark.asyncio
+    async def test_aenter_returns_self(self):
+        from copilot.session import CopilotSession
+
+        session = CopilotSession.__new__(CopilotSession)
+        result = await session.__aenter__()
+        assert result is session
+
+    @pytest.mark.asyncio
+    async def test_aexit_calls_disconnect(self):
+        from copilot.session import CopilotSession
+
+        session = CopilotSession.__new__(CopilotSession)
+        with patch.object(session, "disconnect", new_callable=AsyncMock) as mock_disconnect:
+            await session.__aexit__(None, None, None)
+            mock_disconnect.assert_awaited_once()
+
+    @pytest.mark.asyncio
+    async def test_aexit_returns_none(self):
+        from copilot.session import CopilotSession
+
+        session = CopilotSession.__new__(CopilotSession)
+        with patch.object(session, "disconnect", new_callable=AsyncMock):
+            result = await session.__aexit__(None, None, None)
+            assert result is None