diff --git a/.gitignore b/.gitignore
index 65a6a54..e249383 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,8 @@
 node_modules/
 .env
+.venv-whisper/
+.whisper-models/
 eng.traineddata
 dist/
-.DS_Store
\ No newline at end of file
+.DS_Store
+*.log
diff --git a/README.md b/README.md
index e9de5af..7b7015e 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@
   <img src="https://img.shields.io/badge/Platform-Cross%20Platform-blue?style=flat-square" alt="Platform" />
   <img src="https://img.shields.io/badge/Stealth-100%25%20Invisible-red?style=flat-square" alt="Stealth" />
   <img src="https://img.shields.io/badge/AI-Gemini%20Powered-orange?style=flat-square" alt="AI" />
-  <img src="https://img.shields.io/badge/Speech-Azure%20Optional-blueviolet?style=flat-square" alt="Speech" />
+  <img src="https://img.shields.io/badge/Speech-Azure%20or%20Local%20Whisper-blueviolet?style=flat-square" alt="Speech" />
 </p>
 
 ---
@@ -53,7 +53,7 @@ https://github.com/user-attachments/assets/896a7140-1e85-405d-bfbe-e05c9f3a816b
 
 ### 🚀 **AI-Powered Intelligence**
 - **Direct Image Analysis**: Screenshots are analyzed by Gemini (no Tesseract OCR)
-- **Voice Commands**: Optional Azure Speech (Whisper questions, get instant answers)
+- **Voice Commands**: Optional Azure Speech or local OpenAI Whisper
 - **Context Memory**: Remembers entire interview conversation
 - **Multi-Language Support**: C++, Python, Java, JavaScript, C
 - **Smart Response Window**: Draggable with close button
@@ -68,7 +68,7 @@ https://github.com/user-attachments/assets/896a7140-1e85-405d-bfbe-e05c9f3a816b
 - **Floating Overlay Bar**: Compact command center with camera, mic, and skill selector
 - **Draggable Answer Window**: Move and resize AI response window anywhere
 - **Close Button**: Clean × button to close answer window when needed
-- **Auto-Hide Mic**: Microphone button appears only when Azure Speech is configured
+- **Auto-Hide Mic**: Microphone button appears only when a speech provider is available
 - **Interactive Chat**: Full conversation window with markdown support
 
 ### 🎨 **Visual Design**
@@ -133,7 +133,7 @@ https://github.com/user-attachments/assets/896a7140-1e85-405d-bfbe-e05c9f3a816b
 - [x] **Global shortcuts** (capture, visibility, interaction, chat, settings)
 - [x] **Session memory** and chat UI
 - [x] **Language picker** and DSA skill prompt
-- [x] **Optional Azure Speech** integration with auto‑hide mic
+- [x] **Optional Azure Speech / local Whisper** integration with auto‑hide mic
 - [x] **Multi‑monitor** and area capture APIs
 - [x] **Window binding** and positioning system
 - [x] **Settings management** with app icon/stealth modes
@@ -157,12 +157,22 @@ The setup script automatically handles configuration. You only need:
 # Required: Google Gemini API Key (setup script will ask for this)
 GEMINI_API_KEY=your_gemini_api_key_here
 
-# Optional: Azure Speech Recognition (add later if you want voice features)
+# Optional: Speech Recognition (pick one provider)
+SPEECH_PROVIDER=whisper
+
+# Azure option
 AZURE_SPEECH_KEY=your_azure_speech_key
 AZURE_SPEECH_REGION=your_region
+
+# Local Whisper option
+WHISPER_COMMAND=whisper
+WHISPER_MODEL_DIR=.whisper-models
+WHISPER_MODEL=base
+WHISPER_LANGUAGE=en
+WHISPER_SEGMENT_MS=4000
 ```
 
-**Note**: Speech recognition is completely optional. If Azure credentials are not provided, the microphone button will be automatically hidden from all interfaces.
+**Note**: Speech recognition is completely optional. If no configured provider is available, the microphone button will be automatically hidden from all interfaces.
 
 ## 🚀 Quick Start & Installation
 
@@ -187,7 +197,9 @@ AZURE_SPEECH_REGION=your_region
 
 **That's it!** The setup script will:
 - Install all dependencies automatically
-- Create and configure your `.env` file
+- Create your `.env` file from `env.example` if needed
+- Set up a local Whisper virtualenv in `.venv-whisper`
+- Configure `.env` to use local Whisper by default
 - Build the app (if needed)
 - Launch OpenCluely ready to use (if not works use npm install & then npm start)
 
@@ -196,6 +208,8 @@ AZURE_SPEECH_REGION=your_region
 - **Windows**: Use Git Bash (comes with Git for Windows), WSL, or any bash environment
 - **macOS/Linux**: Use your regular terminal
 - **All platforms**: No manual npm commands needed - the setup script handles everything
+- **Windows Whisper path**: `setup.sh` now writes `WHISPER_COMMAND=.venv-whisper/Scripts/whisper.exe`
+- **macOS/Linux Whisper path**: `setup.sh` writes `WHISPER_COMMAND=.venv-whisper/bin/whisper`
 
 ### 🎛️ Setup Script Options
 
@@ -204,28 +218,50 @@ AZURE_SPEECH_REGION=your_region
 ./setup.sh --ci             # Use npm ci instead of npm install
 ./setup.sh --no-run         # Setup only, don't launch the app
 ./setup.sh --install-system-deps  # Install sox for microphone (optional)
+./setup.sh --skip-whisper  # Skip the local Whisper bootstrap
 ```
 
-### 🔧 **Optional: Azure Speech Setup** (For Voice Features)
+### 🔧 **Optional: Speech Setup** (For Voice Features)
+
+Voice recognition is optional. You can use either Azure Speech or local OpenAI Whisper.
 
-Voice recognition is completely optional. The setup script will create a `.env` file with just the required Gemini key. To add voice features:
+For the local Whisper path, `./setup.sh` now handles the full repo-local setup:
 
-1. Get Azure Speech credentials:
+1. Creates `.venv-whisper`
+2. Installs `openai-whisper`
+3. Points `.env` at `.venv-whisper/bin/whisper`
+4. Creates `.whisper-models`
+5. Runs `npm run test-speech`
+
+1. For Azure Speech:
    - Visit [Azure Portal](https://portal.azure.com/)
    - Create a Speech Service
    - Copy your key and region
 
-2. Add to your `.env` file:
+2. For local Whisper:
+   - Run `./setup.sh --install-system-deps`
+   - Or install required audio tools such as `ffmpeg` and `sox` yourself
+   - On Windows, install audio tooling separately and prefer Git Bash or WSL for `setup.sh`
+
+3. Add one provider to your `.env` file:
    ```env
-   # Already configured by setup script
    GEMINI_API_KEY=your_gemini_api_key_here
-
-   # Add these for voice features (optional)
+   SPEECH_PROVIDER=azure
    AZURE_SPEECH_KEY=your_azure_speech_key
    AZURE_SPEECH_REGION=your_region
    ```
 
-3. Restart the app - microphone buttons will now appear automatically
+   ```env
+   GEMINI_API_KEY=your_gemini_api_key_here
+   SPEECH_PROVIDER=whisper
+   WHISPER_COMMAND=whisper
+   WHISPER_MODEL_DIR=.whisper-models
+   WHISPER_MODEL=base
+   WHISPER_LANGUAGE=en
+   WHISPER_SEGMENT_MS=4000
+   ```
+
+4. Restart the app - microphone buttons will now appear automatically
 
 ## 🎮 How to Use
 
@@ -265,10 +301,11 @@ Voice recognition is completely optional. The setup script will create a `.env`
  - **Image Understanding**: DSA prompt is applied only for new image-based queries; chat messages don’t include the full prompt
  - **Multi-monitor & Area Capture**: Programmatic APIs allow targeting a display and optional rectangular crop for focused analysis
 
-#### 🔊 **Optional Voice Features** (Azure Speech)
-- **Real-time Transcription**: Speak questions naturally
+#### 🔊 **Optional Voice Features** (Azure Speech / Local Whisper)
+- **Chunked Local Transcription**: Local Whisper transcribes short recorded segments on your machine
+- **Real-time Transcription**: Azure Speech supports live interim recognition
 - **Listening Animation**: Visual feedback during recording
-- **Interim Results**: See transcription as you speak
+- **Interim Results**: Available with Azure Speech
 - **Auto-processing**: Instant AI responses to voice input
 ]
 ---
@@ -305,7 +342,8 @@ Voice recognition is completely optional. The setup script will create a `.env`
 
 - **Microphone/voice not working**
   - Voice is optional - ignore related warnings if you don't need it
-  - To enable: install `sox` (Linux/macOS) and add Azure keys to `.env`
+  - Azure mode: add valid Azure keys to `.env`
+  - Whisper mode: install `openai-whisper`, `ffmpeg`, and `sox`, then set `SPEECH_PROVIDER=whisper`
 
 </details>
 
@@ -341,7 +379,7 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
 ## 🙏 Acknowledgments
 
 - **Google Gemini**: Powering AI intelligence
-- **Azure Speech**: Optional voice recognition
+- **Azure Speech / Whisper**: Optional voice recognition
 - **Electron**: Cross-platform desktop framework
 - **Community**: Amazing contributors and feedback
 
diff --git a/chat.html b/chat.html
index d986477..baff03c 100644
--- a/chat.html
+++ b/chat.html
@@ -4,10 +4,8 @@
     <meta charset="UTF-8" />
     <title>Chat</title>
     <link href="./src/styles/common.css" rel="stylesheet" />
-    <!-- Load Font Awesome via link instead of @import to avoid any raw CSS render issues -->
-    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" />
     <!-- PrismJS theme for syntax highlighting -->
-    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/themes/prism-tomorrow.min.css" />
+    <link rel="stylesheet" href="./node_modules/prismjs/themes/prism-tomorrow.min.css" />
     <style>
       /* Font Awesome now loaded via <link> above */
 
@@ -706,11 +704,11 @@
     </div>
     <script src="lib/markdown.js"></script> 
     <!-- PrismJS core and autoloader for language components -->
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/prism.min.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/plugins/autoloader/prism-autoloader.min.js"></script>
+    <script src="./node_modules/prismjs/prism.min.js"></script>
+    <script src="./node_modules/prismjs/plugins/autoloader/prism-autoloader.min.js"></script>
     <script>
       // Configure Prism autoloader
-      try { if (window.Prism && Prism.plugins && Prism.plugins.autoloader) { Prism.plugins.autoloader.languages_path = 'https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/components/'; } } catch (_) {}
+      try { if (window.Prism && Prism.plugins && Prism.plugins.autoloader) { Prism.plugins.autoloader.languages_path = './node_modules/prismjs/components/'; } } catch (_) {}
     </script>
     <script>
       // Use electronAPI from preload script instead of direct require
@@ -1308,4 +1306,4 @@
       }
     </script>
   </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/env.example b/env.example
index 22ffa29..6edb941 100644
--- a/env.example
+++ b/env.example
@@ -2,95 +2,21 @@
 # Get your API key from: https://makersuite.google.com/app/apikey
 GEMINI_API_KEY=your_gemini_api_key_here
 
+# Speech Recognition Configuration
+# Choose one provider: azure or whisper
+SPEECH_PROVIDER=whisper
 
-# Azure Speech Services Configuration
-# Copy this file to .env and fill in your actual credentials
-
-# Your Azure Speech Service key (get this from Azure Portal)
+# Optional: Azure Speech Services Configuration
 AZURE_SPEECH_KEY=your_azure_speech_key_here
-
-# Your Azure Speech Service region (e.g., eastus, westeurope, japaneast)
 AZURE_SPEECH_REGION=your_azure_region_here
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-1. Two Sum
-Solved
-Easy
-Topics
-premium lock icon
-Companies
-Hint
-Given an array of integers nums and an integer target, return indices of the two numbers such that they add up to target.
-
-You may assume that each input would have exactly one solution, and you may not use the same element twice.
-
-You can return the answer in any order.
-
- 
-
-Example 1:
-
-Input: nums = [2,7,11,15], target = 9
-Output: [0,1]
-Explanation: Because nums[0] + nums[1] == 9, we return [0, 1].
-Example 2:
-
-Input: nums = [3,2,4], target = 6
-Output: [1,2]
-Example 3:
-
-Input: nums = [3,3], target = 6
-Output: [0,1]
- 
-
-Constraints:
-
-2 <= nums.length <= 104
--109 <= nums[i] <= 109
--109 <= target <= 109
-Only one valid answer exists.
-
-
-
-
-
-
+# Optional: Local OpenAI Whisper Configuration
+# Requires a local Whisper CLI installation, for example:
+#   pip install openai-whisper
+#   brew install ffmpeg sox
+# Use `whisper`, `python3 -m whisper`, or on Windows `.venv-whisper/Scripts/whisper.exe`
+WHISPER_COMMAND=whisper
+WHISPER_MODEL_DIR=.whisper-models
+WHISPER_MODEL=base
+WHISPER_LANGUAGE=en
+WHISPER_SEGMENT_MS=4000
diff --git a/llm-response.html b/llm-response.html
index c72df0c..40c2669 100644
--- a/llm-response.html
+++ b/llm-response.html
@@ -4,12 +4,14 @@
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <title>OpenCluely</title>
-    <script src="https://cdn.tailwindcss.com"></script>
-    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/prismjs@1.29.0/components/prism-core.min.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/prismjs@1.29.0/plugins/autoloader/prism-autoloader.min.js"></script>
-    <link href="https://cdn.jsdelivr.net/npm/prismjs@1.29.0/themes/prism-tomorrow.min.css" rel="stylesheet">
+    <script src="lib/markdown.js"></script>
     <style>
+        .text-white { color: white; }
+        .text-gray-300 { color: #d1d5db; }
+        .text-sm { font-size: 0.875rem; }
+        .font-medium { font-weight: 500; }
+        .mr-2 { margin-right: 0.5rem; }
+
         body {
             background: linear-gradient(135deg, rgba(0, 0, 0, 0.4) 0%, rgba(20, 20, 20, 0.5) 100%);;
             -webkit-app-region: no-drag;
@@ -490,7 +492,7 @@
             
             console.log('[LLM-RESPONSE] Initializing window...');
 
-            // Configure marked for better rendering
+            // Prefer marked when available, but fall back to the bundled markdown.js parser.
             if (typeof marked !== 'undefined') {
                 marked.setOptions({
                     highlight: function(code, lang) {
@@ -503,8 +505,10 @@
                     gfm: true
                 });
                 console.log('[LLM-RESPONSE] marked configured');
+            } else if (typeof markdown !== 'undefined' && typeof markdown.toHTML === 'function') {
+                console.log('[LLM-RESPONSE] using bundled markdown.js renderer');
             } else {
-                console.error('[LLM-RESPONSE] marked library not loaded!');
+                console.error('[LLM-RESPONSE] no markdown renderer available, falling back to plain text');
             }
 
             console.log('[LLM-RESPONSE] electronAPI exists:', !!window.electronAPI);
@@ -589,6 +593,18 @@
             }
         }
 
+        function renderMarkdown(text) {
+            if (typeof marked !== 'undefined') {
+                return marked.parse ? marked.parse(text) : marked(text);
+            }
+
+            if (typeof markdown !== 'undefined' && typeof markdown.toHTML === 'function') {
+                return markdown.toHTML(text);
+            }
+
+            return escapeHtml(text).replace(/\n/g, '<br>');
+        }
+
         function calculateContentMetrics(response, codeBlocks) {
             // Count lines in the response
             const lineCount = response.split('\n').length;
@@ -808,7 +824,7 @@
             textContent = textContent.replace(/\n\s*\n\s*\n/g, '\n\n').trim();
 
             // Render text content
-            const textHtml = (typeof marked !== 'undefined') ? (marked.parse ? marked.parse(textContent) : marked(textContent)) : textContent;
+            const textHtml = renderMarkdown(textContent);
             document.getElementById('text-content').innerHTML = textHtml;
 
             // Render code blocks
@@ -839,8 +855,10 @@
                 });
             }
 
-            // Highlight code
-            Prism.highlightAll();
+            // Highlight code if Prism is available
+            if (typeof Prism !== 'undefined') {
+                Prism.highlightAll();
+            }
         }
 
         function displayFullLayout(response) {
@@ -849,12 +867,14 @@
             document.getElementById('full-content').classList.remove('hidden');
 
             // Render full markdown
-            const html = (typeof marked !== 'undefined') ? (marked.parse ? marked.parse(response) : marked(response)) : response;
+            const html = renderMarkdown(response);
             const full = document.getElementById('full-markdown');
             full.innerHTML = html;
 
-            // Highlight any code
-            Prism.highlightAll();
+            // Highlight any code if Prism is available
+            if (typeof Prism !== 'undefined') {
+                Prism.highlightAll();
+            }
 
             // Add copy buttons to code blocks in full view
             const pres = full.querySelectorAll('pre');
@@ -1006,4 +1026,4 @@
         })();
     </script>
 </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/main.js b/main.js
index 0d4019d..1e2a1d4 100644
--- a/main.js
+++ b/main.js
@@ -4,6 +4,13 @@ const { app, BrowserWindow, globalShortcut, session, ipcMain } = require("electr
 const logger = require("./src/core/logger").createServiceLogger("MAIN");
 const config = require("./src/core/config");
 
+// Keep Chromium network noise out of the terminal; app-level logs still go through Winston.
+app.commandLine.appendSwitch("log-level", "3");
+app.commandLine.appendSwitch("disable-background-networking");
+app.commandLine.appendSwitch("disable-component-update");
+app.commandLine.appendSwitch("disable-domain-reliability");
+app.commandLine.appendSwitch("no-pings");
+
 // Services
 // Screen capture (image-based)
 const captureService = require("./src/services/capture.service");
@@ -62,6 +69,39 @@ class ApplicationController {
     this.setupServiceEventHandlers();
   }
 
+  handleSecondInstance() {
+    logger.info("Second instance launch detected; focusing existing windows");
+
+    const focusExistingWindows = () => {
+      try {
+        const mainWindow = windowManager.getWindow("main");
+        if (mainWindow) {
+          if (mainWindow.isMinimized && mainWindow.isMinimized()) {
+            mainWindow.restore();
+          }
+          windowManager.showAllWindows();
+          windowManager.showOnCurrentDesktop(mainWindow);
+          mainWindow.focus();
+          return;
+        }
+
+        if (this.isReady) {
+          windowManager.showAllWindows();
+        }
+      } catch (error) {
+        logger.error("Failed to focus existing instance", {
+          error: error.message,
+        });
+      }
+    };
+
+    if (app.isReady()) {
+      focusExistingWindows();
+    } else {
+      app.whenReady().then(focusExistingWindows);
+    }
+  }
+
   async onAppReady() {
     // Force stealth mode IMMEDIATELY when app is ready
     app.setName("Terminal ");
@@ -1246,4 +1286,11 @@ class ApplicationController {
   }
 }
 
-new ApplicationController();
+const gotSingleInstanceLock = app.requestSingleInstanceLock();
+
+if (!gotSingleInstanceLock) {
+  app.quit();
+} else {
+  const controller = new ApplicationController();
+  app.on("second-instance", () => controller.handleSecondInstance());
+}
diff --git a/package-lock.json b/package-lock.json
index 80fa74f..3b6a72a 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -10,12 +10,14 @@
       "hasInstallScript": true,
       "license": "ISC",
       "dependencies": {
+        "@fortawesome/fontawesome-free": "^7.2.0",
         "@google/generative-ai": "^0.24.1",
         "dotenv": "^16.3.1",
         "markdown": "^0.5.0",
         "marked": "^15.0.12",
         "microsoft-cognitiveservices-speech-sdk": "^1.40.0",
         "node-record-lpcm16": "^1.0.1",
+        "prismjs": "^1.30.0",
         "winston": "^3.17.0",
         "winston-daily-rotate-file": "^4.7.1"
       },
@@ -335,6 +337,15 @@
         "node": ">= 10.0.0"
       }
     },
+    "node_modules/@fortawesome/fontawesome-free": {
+      "version": "7.2.0",
+      "resolved": "https://registry.npmjs.org/@fortawesome/fontawesome-free/-/fontawesome-free-7.2.0.tgz",
+      "integrity": "sha512-3DguDv/oUE+7vjMeTSOjCSG+KeawgVQOHrKRnvUuqYh1mfArrh7s+s8hXW3e4RerBA1+Wh+hBqf8sJNpqNrBWg==",
+      "license": "(CC-BY-4.0 AND OFL-1.1 AND MIT)",
+      "engines": {
+        "node": ">=6"
+      }
+    },
     "node_modules/@google/generative-ai": {
       "version": "0.24.1",
       "resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.24.1.tgz",
@@ -3599,6 +3610,15 @@
         "node": ">=10.4.0"
       }
     },
+    "node_modules/prismjs": {
+      "version": "1.30.0",
+      "resolved": "https://registry.npmjs.org/prismjs/-/prismjs-1.30.0.tgz",
+      "integrity": "sha512-DEvV2ZF2r2/63V+tK8hQvrR2ZGn10srHbXviTlcv7Kpzw8jWiNTqbVgjO3IY8RxrrOUF8VPMQQFysYYYv0YZxw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
     "node_modules/process-nextick-args": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz",
diff --git a/package.json b/package.json
index 3d05680..0747dfa 100644
--- a/package.json
+++ b/package.json
@@ -4,9 +4,9 @@
   "description": "AI Problem Solving Assistant",
   "main": "main.js",
   "scripts": {
-    "start": "electron .",
-    "dev": "electron . --no-sandbox --disable-gpu",
-    "test-speech": "node test-azure-speech.js",
+    "start": "env -u ELECTRON_RUN_AS_NODE electron .",
+    "dev": "env -u ELECTRON_RUN_AS_NODE electron . --no-sandbox --disable-gpu",
+    "test-speech": "node scripts/test-speech.js",
     "build": "electron-builder",
     "build:mac": "electron-builder --mac",
     "build:win": "electron-builder --win",
@@ -30,12 +30,14 @@
   },
   "license": "ISC",
   "dependencies": {
+    "@fortawesome/fontawesome-free": "^7.2.0",
     "@google/generative-ai": "^0.24.1",
     "dotenv": "^16.3.1",
     "markdown": "^0.5.0",
     "marked": "^15.0.12",
     "microsoft-cognitiveservices-speech-sdk": "^1.40.0",
     "node-record-lpcm16": "^1.0.1",
+    "prismjs": "^1.30.0",
     "winston": "^3.17.0",
     "winston-daily-rotate-file": "^4.7.1"
   },
diff --git a/preload.js b/preload.js
index ee51014..62f3985 100644
--- a/preload.js
+++ b/preload.js
@@ -47,10 +47,6 @@ contextBridge.exposeInMainWorld('electronAPI', {
   quit: () => {
     try {
       ipcRenderer.send('quit-app');
-      // Also try the app quit method
-      setTimeout(() => {
-        require('electron').app.quit();
-      }, 100);
     } catch (error) {
       console.error('Error in quit:', error);
     }
@@ -134,4 +130,4 @@ contextBridge.exposeInMainWorld('api', {
             ipcRenderer.on(channel, (event, ...args) => func(...args));
         }
     }
-});
\ No newline at end of file
+});
diff --git a/scripts/test-speech.js b/scripts/test-speech.js
new file mode 100644
index 0000000..2518d7a
--- /dev/null
+++ b/scripts/test-speech.js
@@ -0,0 +1,25 @@
+require('dotenv').config();
+
+const speechService = require('../src/services/speech.service');
+
+async function main() {
+  const status = speechService.getStatus();
+
+  console.log('Speech provider:', status.provider);
+  console.log('Initialized:', status.isInitialized);
+  console.log('Available:', speechService.isAvailable());
+  console.log('Effective settings:', JSON.stringify(status.effectiveSettings, null, 2));
+
+  try {
+    const connection = await speechService.testConnection();
+    console.log('Connection test:', JSON.stringify(connection, null, 2));
+  } catch (error) {
+    console.error('Connection test failed:', error.message);
+    process.exitCode = 1;
+  }
+}
+
+main().catch((error) => {
+  console.error(error);
+  process.exit(1);
+});
diff --git a/settings.html b/settings.html
index 100c51d..e740227 100644
--- a/settings.html
+++ b/settings.html
@@ -5,7 +5,6 @@
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <title>Settings</title>
     <link href="./src/styles/common.css" rel="stylesheet">
-    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.1/css/all.min.css" integrity="sha512-DTOQO9RWCH3ppGqcWaEA1BIZOC6xxalwEsw9c2QQeAIftl+Vegovlnee1c9QX4TctnWMn13TZye+giMm8e2LwA==" crossorigin="anonymous" referrerpolicy="no-referrer" />
     <style>
         * {
             margin: 0;
@@ -254,6 +253,23 @@
             color: #ffffff;
             padding: 8px;
         }
+
+        .settings-stack {
+            display: flex;
+            flex-direction: column;
+            gap: 12px;
+            width: 100%;
+        }
+
+        .settings-note {
+            font-size: 11px;
+            color: rgba(255, 255, 255, 0.65);
+            line-height: 1.4;
+        }
+
+        .settings-note code {
+            color: rgba(255, 255, 255, 0.9);
+        }
     </style>
 </head>
 <body>
@@ -336,6 +352,16 @@
                     Speech Recognition
                 </div>
                 <div class="settings-section-content">
+                    <div class="settings-item">
+                        <div>
+                            <div class="settings-item-label">Speech Provider</div>
+                            <div class="settings-item-description">Choose Azure Speech or a local OpenAI Whisper CLI</div>
+                        </div>
+                        <select class="input-field" id="speechProvider">
+                            <option value="azure">Azure Speech</option>
+                            <option value="whisper">Local Whisper</option>
+                        </select>
+                    </div>
                     <div class="settings-item">
                         <div>
                             <div class="settings-item-label">Azure Speech Key</div>
@@ -350,6 +376,39 @@
                         </div>
                         <input type="text" class="input-field" id="azureRegion" placeholder="e.g. eastus">
                     </div>
+                    <div class="settings-item">
+                        <div class="settings-stack">
+                            <div>
+                                <div class="settings-item-label">Whisper Command</div>
+                                <div class="settings-item-description">CLI command for local Whisper, such as <code>whisper</code> or <code>python3 -m whisper</code></div>
+                            </div>
+                            <input type="text" class="input-field" id="whisperCommand" placeholder="whisper" style="max-width: 100%;">
+                        </div>
+                    </div>
+                    <div class="settings-item">
+                        <div>
+                            <div class="settings-item-label">Whisper Model</div>
+                            <div class="settings-item-description">Local model name used by the Whisper CLI</div>
+                        </div>
+                        <input type="text" class="input-field" id="whisperModel" placeholder="base">
+                    </div>
+                    <div class="settings-item">
+                        <div>
+                            <div class="settings-item-label">Whisper Language</div>
+                            <div class="settings-item-description">Language code for local transcription</div>
+                        </div>
+                        <input type="text" class="input-field" id="whisperLanguage" placeholder="en">
+                    </div>
+                    <div class="settings-item">
+                        <div>
+                            <div class="settings-item-label">Whisper Segment Length</div>
+                            <div class="settings-item-description">Chunk size in milliseconds for local transcription</div>
+                        </div>
+                        <input type="number" class="input-field" id="whisperSegmentMs" placeholder="4000" min="2000" step="500">
+                    </div>
+                    <div class="settings-note">
+                        Local Whisper runs on this machine and needs a Whisper CLI installed. These settings apply immediately for the current app session; use <code>.env</code> for startup defaults.
+                    </div>
                 </div>
             </div>
             
@@ -389,4 +448,4 @@
 
     <script src="./src/ui/settings-window.js"></script>
 </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/setup.sh b/setup.sh
index 4990721..d3cd481 100755
--- a/setup.sh
+++ b/setup.sh
@@ -1,16 +1,22 @@
 #!/usr/bin/env bash
 set -euo pipefail
 
-# OpenCluely one-shot setup: install deps, (optionally) build, and run
-# Works on macOS, Linux, and Windows (Git Bash / MSYS2 / Cygwin)
-
-# Defaults
 DO_BUILD=0
 DO_RUN=1
 USE_CI=0
 INSTALL_SYSTEM_DEPS=0
+SETUP_WHISPER=1
+WHISPER_MODEL="${WHISPER_MODEL:-base}"
+WHISPER_LANGUAGE="${WHISPER_LANGUAGE:-en}"
+WHISPER_SEGMENT_MS="${WHISPER_SEGMENT_MS:-4000}"
+WHISPER_VENV_DIR=".venv-whisper"
+WHISPER_MODEL_DIR=".whisper-models"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 OS_NAME="unknown"
 PLATFORM_BUILD_SCRIPT="build"
+PYTHON_BIN="python3"
+WHISPER_PIP_PATH=""
+WHISPER_COMMAND_PATH=""
 
 print_header() {
   echo "========================================"
@@ -23,25 +29,30 @@ usage() {
 Usage: ./setup.sh [options]
 
 This script will:
-1. Install dependencies
-2. Create .env file (if needed) 
-3. Guide you to add your Gemini API key
-4. Optionally build the app
-5. Start OpenCluely
+1. Create .env from env.example when needed
+2. Install Node dependencies
+3. Optionally set up local Whisper in ${WHISPER_VENV_DIR}
+4. Optionally install system audio dependencies
+5. Optionally build the app
+6. Optionally run OpenCluely
 
 Options:
-  --build                 Build a distributable for this OS (electron-builder)
+  --build                 Build a distributable for this OS
   --no-run                Do not start the app after setup
   --run                   Start the app after setup (default)
-  --ci                    Use 'npm ci' instead of 'npm install' if lockfile exists
-  --install-system-deps   Attempt to install required system dependencies (sox) where possible
+  --ci                    Use 'npm ci' instead of 'npm install'
+  --install-system-deps   Attempt to install sox where possible
+  --skip-whisper          Skip local Whisper environment setup
   -h, --help              Show this help
 
 Environment variables:
-  GEMINI_API_KEY          If provided, will be written into .env (skips manual setup)
+  GEMINI_API_KEY          If provided, writes into .env
+  WHISPER_MODEL           Whisper model to configure (default: base)
+  WHISPER_LANGUAGE        Whisper language to configure (default: en)
+  WHISPER_SEGMENT_MS      Segment size in ms (default: 4000)
 
-Example with API key:
-  GEMINI_API_KEY=your_key_here ./setup.sh
+Example:
+  GEMINI_API_KEY=your_key_here ./setup.sh --install-system-deps
 EOF
 }
 
@@ -52,178 +63,210 @@ for arg in "$@"; do
     --run) DO_RUN=1 ;;
     --ci) USE_CI=1 ;;
     --install-system-deps) INSTALL_SYSTEM_DEPS=1 ;;
+    --skip-whisper) SETUP_WHISPER=0 ;;
     -h|--help) usage; exit 0 ;;
     *) echo "Unknown option: $arg"; usage; exit 1 ;;
   esac
-  shift || true
 done
 
 print_header
+cd "$SCRIPT_DIR"
 
-# Detect OS
-UNAME_OUT=$(uname -s || echo "unknown")
-case "$UNAME_OUT" in
-  Linux*)   OS_NAME="linux" ;;
-  Darwin*)  OS_NAME="macos" ;;
-  CYGWIN*|MINGW*|MSYS*) OS_NAME="windows" ;;
-  *)        OS_NAME="unknown" ;;
- esac
+detect_os() {
+  local uname_out
+  uname_out=$(uname -s || echo "unknown")
+  case "$uname_out" in
+    Linux*) OS_NAME="linux" ;;
+    Darwin*) OS_NAME="macos" ;;
+    CYGWIN*|MINGW*|MSYS*) OS_NAME="windows" ;;
+    *) OS_NAME="unknown" ;;
+  esac
 
-echo "Detected OS: $OS_NAME"
+  case "$OS_NAME" in
+    macos) PLATFORM_BUILD_SCRIPT="build:mac" ;;
+    windows) PLATFORM_BUILD_SCRIPT="build:win" ;;
+    linux) PLATFORM_BUILD_SCRIPT="build:linux" ;;
+    *) PLATFORM_BUILD_SCRIPT="build" ;;
+  esac
 
-# Map build script per platform (optional)
-case "$OS_NAME" in
-  macos) PLATFORM_BUILD_SCRIPT="build:mac" ;;
-  windows) PLATFORM_BUILD_SCRIPT="build:win" ;;
-  linux) PLATFORM_BUILD_SCRIPT="build:linux" ;;
-  *) PLATFORM_BUILD_SCRIPT="build" ;;
- esac
-
-# Check Node & npm
-if ! command -v node >/dev/null 2>&1; then
-  echo "Error: Node.js is not installed or not in PATH. Please install Node 18+ and retry."
-  exit 1
-fi
-if ! command -v npm >/dev/null 2>&1; then
-  echo "Error: npm is not installed or not in PATH."
-  exit 1
-fi
+  case "$OS_NAME" in
+    windows)
+      PYTHON_BIN="python"
+      WHISPER_PIP_PATH="${WHISPER_VENV_DIR}/Scripts/pip.exe"
+      WHISPER_COMMAND_PATH="${WHISPER_VENV_DIR}/Scripts/whisper.exe"
+      ;;
+    *)
+      PYTHON_BIN="python3"
+      WHISPER_PIP_PATH="${WHISPER_VENV_DIR}/bin/pip"
+      WHISPER_COMMAND_PATH="${WHISPER_VENV_DIR}/bin/whisper"
+      ;;
+  esac
+}
 
-echo "Node: $(node -v)"
-echo "npm:  $(npm -v)"
+require_command() {
+  local cmd="$1"
+  local message="$2"
+  if ! command -v "$cmd" >/dev/null 2>&1; then
+    echo "Error: ${message}"
+    exit 1
+  fi
+}
 
-# Install system dependencies (optional best-effort)
-if [[ "$INSTALL_SYSTEM_DEPS" -eq 1 ]]; then
-  echo "Attempting to install system dependencies (best effort)"
-  if ! command -v sox >/dev/null 2>&1; then
-    case "$OS_NAME" in
-      macos)
-        if command -v brew >/dev/null 2>&1; then
-          echo "Installing sox via Homebrew..."
-          brew install sox || echo "Could not install sox via brew. You can install it manually: brew install sox"
-        else
-          echo "Homebrew not found. Install sox manually: https://formulae.brew.sh/formula/sox"
-        fi
-        ;;
-      linux)
-        if command -v apt-get >/dev/null 2>&1; then
-          echo "Installing sox via apt-get (sudo may prompt)..."
-          sudo apt-get update -y && sudo apt-get install -y sox || echo "Could not install sox via apt-get."
-        elif command -v dnf >/dev/null 2>&1; then
-          echo "Installing sox via dnf (sudo may prompt)..."
-          sudo dnf install -y sox || echo "Could not install sox via dnf."
-        elif command -v pacman >/dev/null 2>&1; then
-          echo "Installing sox via pacman (sudo may prompt)..."
-          sudo pacman -S --noconfirm sox || echo "Could not install sox via pacman."
-        else
-          echo "Unknown package manager. Please install 'sox' manually."
-        fi
-        ;;
-      windows)
-        echo "On Windows, install sox via Chocolatey (Admin PowerShell): choco install sox"
-        ;;
-      *)
-        echo "Unknown OS; please install 'sox' manually if you need microphone capture."
-        ;;
-    esac
-  else
-    echo "sox already installed."
+ensure_env_file() {
+  if [[ ! -f .env ]]; then
+    if [[ -f env.example ]]; then
+      echo "Creating .env from env.example"
+      cp env.example .env
+    else
+      echo "Error: env.example is missing"
+      exit 1
+    fi
   fi
-fi
+}
 
-# Project root
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-cd "$SCRIPT_DIR"
+upsert_env() {
+  local key="$1"
+  local value="$2"
 
-# Ensure .env exists and has API key
-ENV_NEEDS_CONFIG=0
-if [[ ! -f .env ]]; then
-  if [[ -f env.example ]]; then
-    echo "Creating .env from env.example"
-    cp env.example .env
-    ENV_NEEDS_CONFIG=1
+  if grep -q "^${key}=" .env 2>/dev/null; then
+    perl -0pi -e "s/^${key}=.*\$/${key}=${value}/m" .env
   else
-    echo "Creating new .env file"
-    cat > .env << 'EOF'
-# Google Gemini API Configuration
-# Get your API key from: https://aistudio.google.com/
-GEMINI_API_KEY=your_gemini_api_key_here
-
-# Optional: Azure Speech Services Configuration
-# AZURE_SPEECH_KEY=your_azure_speech_key_here
-# AZURE_SPEECH_REGION=your_azure_region_here
-EOF
-    ENV_NEEDS_CONFIG=1
+    printf "%s=%s\n" "$key" "$value" >> .env
   fi
-fi
-
-# If GEMINI_API_KEY is provided via env and .env lacks it, append it
-if [[ -n "${GEMINI_API_KEY:-}" ]]; then
-  if ! grep -q '^GEMINI_API_KEY=' .env 2>/dev/null; then
-    echo "GEMINI_API_KEY is set in the environment; writing to .env"
-    printf "GEMINI_API_KEY=%s\n" "$GEMINI_API_KEY" >> .env
-    ENV_NEEDS_CONFIG=0
+}
+
+ensure_gemini_key() {
+  if [[ -n "${GEMINI_API_KEY:-}" ]]; then
+    upsert_env "GEMINI_API_KEY" "$GEMINI_API_KEY"
   fi
-fi
-
-# Check if API key is configured
-if [[ "$ENV_NEEDS_CONFIG" -eq 1 ]] || grep -q "your_gemini_api_key_here" .env 2>/dev/null; then
-  echo ""
-  echo "=========================================="
-  echo " ⚠️  API KEY REQUIRED"
-  echo "=========================================="
-  echo ""
-  echo "OpenCluely needs a Google Gemini API key to work."
-  echo ""
-  echo "Steps to get your API key:"
-  echo "1. Visit: https://aistudio.google.com/"
-  echo "2. Click 'Create API Key'"
-  echo "3. Copy the generated key"
-  echo ""
-  echo "Then edit your .env file and replace 'your_gemini_api_key_here' with your actual key:"
-  echo ""
-  echo "GEMINI_API_KEY=your_actual_api_key_here"
-  echo ""
-  echo "You can edit .env with any text editor:"
-  echo "  nano .env      (Linux/macOS)"
-  echo "  notepad .env   (Windows)"
-  echo "  code .env      (VS Code)"
-  echo ""
-  read -p "Press Enter after you've added your API key to continue..."
-  echo ""
-fi
-
-# Install node dependencies
-if [[ -f package-lock.json && "$USE_CI" -eq 1 ]]; then
-  echo "Installing dependencies with npm ci"
-  npm ci
-else
-  echo "Installing dependencies with npm install"
-  npm install
-fi
-
-# Build (optional)
-if [[ "$DO_BUILD" -eq 1 ]]; then
-  echo "Building app for $OS_NAME via npm run $PLATFORM_BUILD_SCRIPT"
-  npm run "$PLATFORM_BUILD_SCRIPT"
-fi
-
-# Run (default)
-if [[ "$DO_RUN" -eq 1 ]]; then
-  # Final validation before starting
-  if grep -q "your_gemini_api_key_here" .env 2>/dev/null; then
+
+  if ! grep -q '^GEMINI_API_KEY=' .env 2>/dev/null || grep -q 'your_gemini_api_key_here' .env 2>/dev/null; then
     echo ""
-    echo "❌ Error: API key not configured!"
-    echo "Please edit .env and replace 'your_gemini_api_key_here' with your actual Gemini API key."
-    echo "Get your key from: https://aistudio.google.com/"
+    echo "=========================================="
+    echo " API KEY REQUIRED"
+    echo "=========================================="
     echo ""
-    echo "Then run the setup script again:"
-    echo "./setup.sh"
+    echo "Add your Gemini API key to .env and rerun this script if needed."
+    echo "Get a key from: https://aistudio.google.com/"
+    echo ""
+    read -r -p "Press Enter after you've updated .env..."
+  fi
+
+  if grep -q 'your_gemini_api_key_here' .env 2>/dev/null; then
+    echo "Error: GEMINI_API_KEY is still not configured in .env"
     exit 1
   fi
-  
-  echo "Starting app (npm start)"
-  npm start
-else
-  echo "Setup complete. Skipping run."
-fi
+}
+
+install_system_deps() {
+  if [[ "$INSTALL_SYSTEM_DEPS" -ne 1 ]]; then
+    return
+  fi
+
+  echo "Attempting to install system audio dependencies"
+
+  if command -v sox >/dev/null 2>&1; then
+    echo "sox already installed"
+    return
+  fi
+
+  case "$OS_NAME" in
+    macos)
+      if command -v brew >/dev/null 2>&1; then
+        brew install sox || echo "Could not install sox automatically. Install it manually with: brew install sox"
+      else
+        echo "Homebrew not found. Install sox manually."
+      fi
+      ;;
+    linux)
+      if command -v apt-get >/dev/null 2>&1; then
+        sudo apt-get update -y && sudo apt-get install -y sox || echo "Could not install sox via apt-get"
+      elif command -v dnf >/dev/null 2>&1; then
+        sudo dnf install -y sox || echo "Could not install sox via dnf"
+      elif command -v pacman >/dev/null 2>&1; then
+        sudo pacman -S --noconfirm sox || echo "Could not install sox via pacman"
+      else
+        echo "Unknown package manager. Install sox manually."
+      fi
+      ;;
+    windows)
+      echo "Install sox manually on Windows, for example via Chocolatey: choco install sox"
+      ;;
+    *)
+      echo "Unknown OS. Install sox manually if you want microphone capture."
+      ;;
+  esac
+}
+
+install_node_deps() {
+  if [[ -f package-lock.json && "$USE_CI" -eq 1 ]]; then
+    echo "Installing Node dependencies with npm ci"
+    npm ci
+  else
+    echo "Installing Node dependencies with npm install"
+    npm install
+  fi
+}
+
+setup_whisper_env() {
+  if [[ "$SETUP_WHISPER" -ne 1 ]]; then
+    echo "Skipping local Whisper setup"
+    return
+  fi
+
+  require_command "$PYTHON_BIN" "Python 3 is required for local Whisper setup."
+
+  if [[ ! -d "$WHISPER_VENV_DIR" ]]; then
+    echo "Creating Whisper virtual environment at $WHISPER_VENV_DIR"
+    "$PYTHON_BIN" -m venv "$WHISPER_VENV_DIR"
+  fi
+
+  echo "Installing local Whisper into $WHISPER_VENV_DIR"
+  "$WHISPER_PIP_PATH" install --upgrade pip
+  "$WHISPER_PIP_PATH" install openai-whisper
+
+  mkdir -p "$WHISPER_MODEL_DIR"
+
+  upsert_env "SPEECH_PROVIDER" "whisper"
+  upsert_env "AZURE_SPEECH_KEY" ""
+  upsert_env "AZURE_SPEECH_REGION" ""
+  upsert_env "WHISPER_COMMAND" "${WHISPER_COMMAND_PATH}"
+  upsert_env "WHISPER_MODEL_DIR" "${WHISPER_MODEL_DIR}"
+  upsert_env "WHISPER_MODEL" "${WHISPER_MODEL}"
+  upsert_env "WHISPER_LANGUAGE" "${WHISPER_LANGUAGE}"
+  upsert_env "WHISPER_SEGMENT_MS" "${WHISPER_SEGMENT_MS}"
+
+  echo "Running Whisper smoke test"
+  npm run test-speech
+}
+
+build_app() {
+  if [[ "$DO_BUILD" -eq 1 ]]; then
+    echo "Building app for $OS_NAME with npm run $PLATFORM_BUILD_SCRIPT"
+    npm run "$PLATFORM_BUILD_SCRIPT"
+  fi
+}
+
+run_app() {
+  if [[ "$DO_RUN" -eq 1 ]]; then
+    echo "Starting app"
+    npm start
+  else
+    echo "Setup complete. Skipping run."
+  fi
+}
+
+detect_os
+echo "Detected OS: $OS_NAME"
+require_command node "Node.js 18+ is required."
+require_command npm "npm is required."
+echo "Node: $(node -v)"
+echo "npm:  $(npm -v)"
+
+ensure_env_file
+ensure_gemini_key
+install_system_deps
+install_node_deps
+setup_whisper_env
+build_app
+run_app
diff --git a/src/core/config.js b/src/core/config.js
index 12dfff1..35f7ca5 100644
--- a/src/core/config.js
+++ b/src/core/config.js
@@ -55,11 +55,17 @@ class ConfigManager {
       },
 
       speech: {
+        provider: 'azure',
         azure: {
           language: 'en-US',
           enableDictation: true,
           enableAudioLogging: false,
           outputFormat: 'detailed'
+        },
+        whisper: {
+          model: 'base',
+          language: 'en',
+          segmentMs: 4000
         }
       },
 
@@ -98,4 +104,4 @@ class ConfigManager {
   }
 }
 
-module.exports = new ConfigManager();
\ No newline at end of file
+module.exports = new ConfigManager();
diff --git a/src/managers/window.manager.js b/src/managers/window.manager.js
index 6e0b78b..8c37825 100644
--- a/src/managers/window.manager.js
+++ b/src/managers/window.manager.js
@@ -1642,4 +1642,4 @@ class WindowManager {
     }
 }
 
-module.exports = new WindowManager();
\ No newline at end of file
+module.exports = new WindowManager();
diff --git a/src/services/speech.service.js b/src/services/speech.service.js
index f371603..45875f5 100644
--- a/src/services/speech.service.js
+++ b/src/services/speech.service.js
@@ -1,7 +1,7 @@
 // Enhanced polyfills for Azure Speech SDK in Node.js environment
 if (typeof window === 'undefined') {
   global.window = {
-    navigator: { 
+    navigator: {
       userAgent: 'Node.js',
       platform: 'node',
       mediaDevices: {
@@ -30,7 +30,7 @@ if (typeof window === 'undefined') {
         ])
       }
     },
-    document: { 
+    document: {
       createElement: (tagName) => {
         const element = {
           addEventListener: () => {},
@@ -51,8 +51,7 @@ if (typeof window === 'undefined') {
           focus: () => {},
           blur: () => {}
         };
-        
-        // Special handling for audio elements
+
         if (tagName.toLowerCase() === 'audio') {
           Object.assign(element, {
             play: () => Promise.resolve(),
@@ -78,7 +77,7 @@ if (typeof window === 'undefined') {
             currentSrc: ''
           });
         }
-        
+
         return element;
       },
       getElementById: () => null,
@@ -99,7 +98,7 @@ if (typeof window === 'undefined') {
         style: {}
       }
     },
-    location: { 
+    location: {
       href: 'file:///',
       protocol: 'file:',
       host: '',
@@ -118,7 +117,6 @@ if (typeof window === 'undefined') {
     clearInterval: global.clearInterval,
     requestAnimationFrame: (callback) => global.setTimeout(callback, 16),
     cancelAnimationFrame: global.clearTimeout,
-    // Add console methods if not available
     console: global.console || {
       log: () => {},
       error: () => {},
@@ -127,50 +125,50 @@ if (typeof window === 'undefined') {
       debug: () => {}
     },
     AudioContext: class AudioContext {
-      constructor() { 
-        this.state = 'running'; 
+      constructor() {
+        this.state = 'running';
         this.sampleRate = 16000;
         this.currentTime = 0;
         this.listener = {
           setPosition: () => {},
           setOrientation: () => {}
         };
-        this.destination = { 
-          connect: () => {}, 
+        this.destination = {
+          connect: () => {},
           disconnect: () => {},
           channelCount: 2,
           channelCountMode: 'explicit',
           channelInterpretation: 'speakers'
         };
       }
-      createMediaStreamSource(stream) { 
-        return { 
-          connect: () => {}, 
+      createMediaStreamSource(stream) {
+        return {
+          connect: () => {},
           disconnect: () => {},
           mediaStream: stream
-        }; 
+        };
       }
-      createGain() { 
-        return { 
-          connect: () => {}, 
-          disconnect: () => {}, 
-          gain: { 
+      createGain() {
+        return {
+          connect: () => {},
+          disconnect: () => {},
+          gain: {
             value: 1,
             setValueAtTime: () => {},
             linearRampToValueAtTime: () => {},
             exponentialRampToValueAtTime: () => {}
           }
-        }; 
+        };
       }
-      createScriptProcessor(bufferSize = 4096, inputChannels = 1, outputChannels = 1) { 
-        return { 
-          connect: () => {}, 
-          disconnect: () => {}, 
+      createScriptProcessor(bufferSize = 4096, inputChannels = 1, outputChannels = 1) {
+        return {
+          connect: () => {},
+          disconnect: () => {},
           onaudioprocess: null,
           bufferSize,
           numberOfInputs: inputChannels,
           numberOfOutputs: outputChannels
-        }; 
+        };
       }
       createAnalyser() {
         return {
@@ -187,7 +185,7 @@ if (typeof window === 'undefined') {
           getFloatTimeDomainData: () => {}
         };
       }
-      decodeAudioData(audioData) {
+      decodeAudioData() {
         return Promise.resolve({
           length: 44100,
           sampleRate: 44100,
@@ -196,64 +194,64 @@ if (typeof window === 'undefined') {
           getChannelData: () => new Float32Array(44100)
         });
       }
-      suspend() { 
+      suspend() {
         this.state = 'suspended';
-        return Promise.resolve(); 
+        return Promise.resolve();
       }
-      resume() { 
+      resume() {
         this.state = 'running';
-        return Promise.resolve(); 
+        return Promise.resolve();
       }
-      close() { 
+      close() {
         this.state = 'closed';
-        return Promise.resolve(); 
+        return Promise.resolve();
       }
     },
     webkitAudioContext: class webkitAudioContext {
-      constructor() { 
-        this.state = 'running'; 
+      constructor() {
+        this.state = 'running';
         this.sampleRate = 16000;
         this.currentTime = 0;
         this.listener = {
           setPosition: () => {},
           setOrientation: () => {}
         };
-        this.destination = { 
-          connect: () => {}, 
+        this.destination = {
+          connect: () => {},
           disconnect: () => {},
           channelCount: 2,
           channelCountMode: 'explicit',
           channelInterpretation: 'speakers'
         };
       }
-      createMediaStreamSource(stream) { 
-        return { 
-          connect: () => {}, 
+      createMediaStreamSource(stream) {
+        return {
+          connect: () => {},
           disconnect: () => {},
           mediaStream: stream
-        }; 
+        };
       }
-      createGain() { 
-        return { 
-          connect: () => {}, 
-          disconnect: () => {}, 
-          gain: { 
+      createGain() {
+        return {
+          connect: () => {},
+          disconnect: () => {},
+          gain: {
             value: 1,
             setValueAtTime: () => {},
             linearRampToValueAtTime: () => {},
             exponentialRampToValueAtTime: () => {}
           }
-        }; 
+        };
       }
-      createScriptProcessor(bufferSize = 4096, inputChannels = 1, outputChannels = 1) { 
-        return { 
-          connect: () => {}, 
-          disconnect: () => {}, 
+      createScriptProcessor(bufferSize = 4096, inputChannels = 1, outputChannels = 1) {
+        return {
+          connect: () => {},
+          disconnect: () => {},
           onaudioprocess: null,
           bufferSize,
           numberOfInputs: inputChannels,
           numberOfOutputs: outputChannels
-        }; 
+        };
       }
       createAnalyser() {
         return {
@@ -270,7 +268,7 @@ if (typeof window === 'undefined') {
           getFloatTimeDomainData: () => {}
         };
       }
-      decodeAudioData(audioData) {
+      decodeAudioData() {
         return Promise.resolve({
           length: 44100,
           sampleRate: 44100,
@@ -279,22 +277,21 @@ if (typeof window === 'undefined') {
           getChannelData: () => new Float32Array(44100)
         });
       }
-      suspend() { 
+      suspend() {
         this.state = 'suspended';
-        return Promise.resolve(); 
+        return Promise.resolve();
       }
-      resume() { 
+      resume() {
         this.state = 'running';
-        return Promise.resolve(); 
+        return Promise.resolve();
       }
-      close() { 
+      close() {
         this.state = 'closed';
-        return Promise.resolve(); 
+        return Promise.resolve();
       }
     },
-    // Add additional globals that might be needed
     URL: class URL {
-      constructor(url, base) {
+      constructor(url) {
         this.href = url;
         this.protocol = 'https:';
         this.host = 'localhost';
@@ -305,7 +302,9 @@ if (typeof window === 'undefined') {
         this.hash = '';
         this.origin = 'https://localhost';
       }
-      toString() { return this.href; }
+      toString() {
+        return this.href;
+      }
     },
     Blob: class Blob {
       constructor(parts = [], options = {}) {
@@ -313,10 +312,18 @@ if (typeof window === 'undefined') {
         this.type = options.type || '';
         this.parts = parts;
       }
-      slice() { return new Blob(); }
-      stream() { return new ReadableStream(); }
-      text() { return Promise.resolve(''); }
-      arrayBuffer() { return Promise.resolve(new ArrayBuffer(0)); }
+      slice() {
+        return new Blob();
+      }
+      stream() {
+        return new ReadableStream();
+      }
+      text() {
+        return Promise.resolve('');
+      }
+      arrayBuffer() {
+        return Promise.resolve(new ArrayBuffer(0));
+      }
     },
     File: class File {
       constructor(parts, name, options = {}) {
@@ -326,10 +333,18 @@ if (typeof window === 'undefined') {
         this.lastModified = Date.now();
         this.parts = parts;
       }
-      slice() { return new File([], this.name); }
-      stream() { return new ReadableStream(); }
-      text() { return Promise.resolve(''); }
-      arrayBuffer() { return Promise.resolve(new ArrayBuffer(0)); }
+      slice() {
+        return new File([], this.name);
+      }
+      stream() {
+        return new ReadableStream();
+      }
+      text() {
+        return Promise.resolve('');
+      }
+      arrayBuffer() {
+        return Promise.resolve(new ArrayBuffer(0));
+      }
     }
   };
   global.document = global.window.document;
@@ -339,8 +354,7 @@ if (typeof window === 'undefined') {
   global.URL = global.window.URL;
   global.Blob = global.window.Blob;
   global.File = global.window.File;
-  
-  // Additional polyfills that might be needed
+
   if (!global.performance) {
     global.performance = {
       now: () => Date.now(),
@@ -352,7 +366,7 @@ if (typeof window === 'undefined') {
       getEntriesByType: () => []
     };
   }
-  
+
   if (!global.crypto) {
     global.crypto = {
       getRandomValues: (arr) => {
@@ -365,12 +379,28 @@ if (typeof window === 'undefined') {
   }
 }
 
-const sdk = require('microsoft-cognitiveservices-speech-sdk');
-const recorder = require('node-record-lpcm16');
+const fs = require('fs');
+const os = require('os');
+const path = require('path');
+const { spawn, spawnSync } = require('child_process');
 const { EventEmitter } = require('events');
 const logger = require('../core/logger').createServiceLogger('SPEECH');
 const config = require('../core/config');
 
+let sdk = null;
+try {
+  sdk = require('microsoft-cognitiveservices-speech-sdk');
+} catch (error) {
+  logger.warn('Azure Speech SDK unavailable', { error: error.message });
+}
+
+let recorder = null;
+try {
+  recorder = require('node-record-lpcm16');
+} catch (error) {
+  logger.warn('Local audio recorder dependency unavailable', { error: error.message });
+}
+
 class SpeechService extends EventEmitter {
   constructor() {
     super();
@@ -383,72 +413,133 @@ class SpeechService extends EventEmitter {
     this.maxRetries = 3;
     this.pushStream = null;
     this.recording = null;
-    this.available = false; // track availability
-    
+    this.available = false;
+    this.provider = 'disabled';
+    this.runtimeSettings = {};
+    this.segmentBuffers = [];
+    this.segmentBytes = 0;
+    this.segmentTimer = null;
+    this.transcriptionInFlight = false;
+    this.pendingFlush = false;
+    this.audioProgram = null;
+    this.whisperCommand = null;
+
     this.initializeClient();
   }
 
   initializeClient() {
+    this._cleanup();
+    this.provider = 'disabled';
+    this.available = false;
+    this.speechConfig = null;
+    this.whisperCommand = null;
+
+    const provider = this._getConfiguredProvider();
+    this.provider = provider;
+
+    if (provider === 'azure') {
+      this._initializeAzureClient();
+      return;
+    }
+
+    if (provider === 'whisper') {
+      this._initializeWhisperClient();
+      return;
+    }
+
+    const reason = 'Speech recognition disabled. Configure Azure or local Whisper.';
+    logger.warn(reason);
+    this.emit('status', reason);
+  }
+
+  _initializeAzureClient() {
     try {
-      // Get Azure Speech credentials from environment variables
-      const subscriptionKey = process.env.AZURE_SPEECH_KEY;
-      const region = process.env.AZURE_SPEECH_REGION;
-      
+      if (!sdk) {
+        throw new Error('Azure Speech SDK dependency is not installed');
+      }
+
+      if (!recorder || typeof recorder.record !== 'function') {
+        throw new Error('Local microphone recorder dependency is not installed');
+      }
+
+      const subscriptionKey = this._getSetting('azureKey') || process.env.AZURE_SPEECH_KEY;
+      const region = this._getSetting('azureRegion') || process.env.AZURE_SPEECH_REGION;
+
       if (!subscriptionKey || !region) {
         const reason = 'Azure Speech credentials not found. Speech recognition disabled.';
-        logger.warn('Speech service disabled (missing credentials)');
-        this.available = false;
+        logger.warn('Speech service disabled (missing Azure credentials)');
         this.emit('status', reason);
         return;
       }
 
-      // Validate region format
-      const validRegions = ['eastus', 'westus', 'westus2', 'eastus2', 'centralus', 'northcentralus', 'southcentralus', 'westcentralus', 'canadacentral', 'canadaeast', 'brazilsouth', 'northeurope', 'westeurope', 'uksouth', 'ukwest', 'francecentral', 'germanywestcentral', 'norwayeast', 'switzerlandnorth', 'switzerlandwest', 'swedencentral', 'uaenorth', 'southafricanorth', 'centralindia', 'southindia', 'westindia', 'eastasia', 'southeastasia', 'japaneast', 'japanwest', 'koreacentral', 'koreasouth', 'australiaeast', 'australiasoutheast'];
-      
-      if (!validRegions.includes(region.toLowerCase())) {
-        logger.warn('Potentially invalid Azure region specified', { region });
-      }
-
-      // Initialize Azure Speech configuration
       this.speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, region);
-      
-      // Configure speech recognition settings with better defaults
+
       const azureConfig = config.get('speech.azure') || {};
       this.speechConfig.speechRecognitionLanguage = azureConfig.language || 'en-US';
       this.speechConfig.outputFormat = sdk.OutputFormat.Detailed;
-      
-      // Set additional properties for better recognition
-      this.speechConfig.setProperty(sdk.PropertyId.SpeechServiceConnection_InitialSilenceTimeoutMs, "5000");
-      this.speechConfig.setProperty(sdk.PropertyId.SpeechServiceConnection_EndSilenceTimeoutMs, "2000");
-      this.speechConfig.setProperty(sdk.PropertyId.Speech_SegmentationSilenceTimeoutMs, "2000");
-      
+      this.speechConfig.setProperty(sdk.PropertyId.SpeechServiceConnection_InitialSilenceTimeoutMs, '5000');
+      this.speechConfig.setProperty(sdk.PropertyId.SpeechServiceConnection_EndSilenceTimeoutMs, '2000');
+      this.speechConfig.setProperty(sdk.PropertyId.Speech_SegmentationSilenceTimeoutMs, '2000');
+
       if (azureConfig.enableDictation) {
         this.speechConfig.enableDictation();
       }
-      
+
       if (azureConfig.enableAudioLogging) {
         this.speechConfig.enableAudioLogging();
       }
-      
+
+      this.available = true;
       logger.info('Azure Speech service initialized successfully', {
         region,
         language: azureConfig.language || 'en-US'
       });
-      
-      this.available = true;
       this.emit('status', 'Azure Speech Services ready');
-      
     } catch (error) {
-      logger.error('Failed to initialize Azure Speech client', { error: error.message, stack: error.stack });
+      logger.error('Failed to initialize Azure Speech client', {
+        error: error.message,
+        stack: error.stack
+      });
+      this.available = false;
+      this.emit('status', 'Azure speech unavailable');
+    }
+  }
+
+  _initializeWhisperClient() {
+    try {
+      if (!recorder || typeof recorder.record !== 'function') {
+        throw new Error('Local microphone recorder dependency is not installed');
+      }
+
+      this.whisperCommand = this._resolveWhisperCommand();
+      if (!this.whisperCommand) {
+        const reason = 'Local Whisper unavailable. Install the Whisper CLI or set WHISPER_COMMAND.';
+        logger.warn(reason);
+        this.emit('status', reason);
+        return;
+      }
+
+      this.available = true;
+      logger.info('Local Whisper service initialized successfully', {
+        command: [this.whisperCommand.command, ...this.whisperCommand.baseArgs].join(' '),
+        model: this._getWhisperModel(),
+        language: this._getWhisperLanguage()
+      });
+      this.emit('status', 'Local Whisper ready');
+    } catch (error) {
+      logger.error('Failed to initialize local Whisper client', {
+        error: error.message,
+        stack: error.stack
+      });
       this.available = false;
-      this.emit('status', 'Speech recognition unavailable');
+      this.emit('status', 'Local Whisper unavailable');
     }
   }
 
   startRecording() {
     try {
-      if (!this.speechConfig) {
-        const errorMsg = 'Azure Speech client not initialized';
+      if (!this.available) {
+        const errorMsg = `Speech provider "${this.provider}" is not available`;
         logger.error(errorMsg);
         this.emit('error', errorMsg);
         return;
@@ -462,7 +553,17 @@ class SpeechService extends EventEmitter {
       this.sessionStartTime = Date.now();
       this.retryCount = 0;
 
-      this._attemptRecording();
+      if (this.provider === 'azure') {
+        this._startAzureRecording();
+        return;
+      }
+
+      if (this.provider === 'whisper') {
+        this._startWhisperRecording();
+        return;
+      }
+
+      throw new Error(`Unsupported speech provider: ${this.provider}`);
     } catch (error) {
       logger.error('Critical error in startRecording', { error: error.message, stack: error.stack });
       this.emit('error', `Speech recognition failed to start: ${error.message}`);
@@ -470,182 +571,125 @@ class SpeechService extends EventEmitter {
     }
   }
 
-  _attemptRecording() {
+  _startAzureRecording() {
+    if (!this.speechConfig) {
+      throw new Error('Azure Speech client not initialized');
+    }
+
+    this.isRecording = true;
+    this.emit('recording-started');
+    this.emit('status', 'Azure recording started');
+    this._cleanup();
+
     try {
-      this.isRecording = true;
-      this.emit('recording-started');
-
-      // Clean up any existing resources
-      this._cleanup();
-
-             // Use push stream with Node.js audio capture (more reliable for Electron main process)
-       try {
-         this.pushStream = sdk.AudioInputStream.createPushStream();
-         this.audioConfig = sdk.AudioConfig.fromStreamInput(this.pushStream);
-         
-         // Start capturing real microphone audio
-         this._startMicrophoneCapture();
-         
-       } catch (audioError) {
-         logger.error('Failed to create audio config', { error: audioError.message });
-         this.emit('error', 'Audio configuration failed. Please check microphone permissions.');
-         this.isRecording = false;
-         return;
-       }
-             
-       // Create speech recognizer
-       try {
-         this.recognizer = new sdk.SpeechRecognizer(this.speechConfig, this.audioConfig);
-       } catch (recognizerError) {
-         throw recognizerError;
-       }
-
-             // Set up event handlers with better error handling
-       this.recognizer.recognizing = (s, e) => {
-         try {
-           if (e.result.reason === sdk.ResultReason.RecognizingSpeech) {
-             logger.debug('Interim transcription received', { 
-               text: e.result.text,
-               offset: e.result.offset,
-               duration: e.result.duration
-             });
-             this.emit('interim-transcription', e.result.text);
-           }
-         } catch (error) {
-           logger.error('Error in recognizing handler', { error: error.message });
-         }
-       };
-
-       this.recognizer.recognized = (s, e) => {
-         try {
-           if (e.result.reason === sdk.ResultReason.RecognizedSpeech) {
-             const sessionDuration = Date.now() - this.sessionStartTime;
-             
-             // Only emit transcription if there's actual text content
-             if (e.result.text && e.result.text.trim().length > 0) {
-               logger.info('Final transcription received', {
-                 text: e.result.text,
-                 sessionDuration: `${sessionDuration}ms`,
-                 textLength: e.result.text.length,
-                 confidence: e.result.properties?.getProperty(sdk.PropertyId.SpeechServiceResponse_JsonResult)
-               });
-               
-               this.emit('transcription', e.result.text);
-             } else {
-               logger.debug('Empty transcription result ignored', {
-                 sessionDuration: `${sessionDuration}ms`,
-                 confidence: e.result.properties?.getProperty(sdk.PropertyId.SpeechServiceResponse_JsonResult)
-               });
-             }
-           } else if (e.result.reason === sdk.ResultReason.NoMatch) {
-             logger.debug('No speech pattern detected in audio');
-             
-             // Check if there's detailed no-match information
-             const noMatchDetails = e.result.properties?.getProperty(sdk.PropertyId.SpeechServiceResponse_JsonResult);
-             if (noMatchDetails) {
-               logger.debug('No match details', { details: noMatchDetails });
-             }
-           }
-         } catch (error) {
-           logger.error('Error in recognized handler', { error: error.message });
-         }
-       };
-
-      this.recognizer.canceled = (s, e) => {
-        logger.warn('Recognition session canceled', { 
-          reason: e.reason,
-          errorCode: e.errorCode,
-          errorDetails: e.errorDetails 
-        });
-        
-        if (e.reason === sdk.CancellationReason.Error) {
-          const errorMsg = `Recognition error: ${e.errorDetails}`;
-          
-          // Check for specific error types and provide better messages
-          if (e.errorDetails.includes('1006')) {
-            this.emit('error', 'Network connection failed. Please check your internet connection.');
-          } else if (e.errorDetails.includes('InvalidServiceCredentials')) {
-            this.emit('error', 'Invalid Azure Speech credentials. Please check AZURE_SPEECH_KEY and AZURE_SPEECH_REGION.');
-          } else if (e.errorDetails.includes('Forbidden')) {
-            this.emit('error', 'Access denied. Please check your Azure Speech service subscription and region.');
-          } else if (e.errorDetails.includes('AudioInputMicrophone_InitializationFailure')) {
-            this.emit('error', 'Microphone initialization failed. Please check microphone permissions and availability.');
-          } else {
-            this.emit('error', errorMsg);
-          }
-          
-          // Attempt retry for transient errors
-          if (this.retryCount < this.maxRetries && (
-            e.errorDetails.includes('1006') || 
-            e.errorDetails.includes('timeout') || 
-            e.errorDetails.includes('network')
-          )) {
-            this.retryCount++;
-            logger.info(`Retrying recognition (attempt ${this.retryCount}/${this.maxRetries})`);
-            setTimeout(() => {
-              if (!this.isRecording) {
-                this._attemptRecording();
-              }
-            }, 1000 * this.retryCount);
-            return;
-          }
+      this.pushStream = sdk.AudioInputStream.createPushStream();
+      this.audioConfig = sdk.AudioConfig.fromStreamInput(this.pushStream);
+      this._startMicrophoneCapture();
+      this.recognizer = new sdk.SpeechRecognizer(this.speechConfig, this.audioConfig);
+    } catch (error) {
+      logger.error('Failed to start Azure recording session', { error: error.message });
+      this.emit('error', `Audio configuration failed: ${error.message}`);
+      this.isRecording = false;
+      return;
+    }
+
+    this.recognizer.recognizing = (s, e) => {
+      try {
+        if (e.result.reason === sdk.ResultReason.RecognizingSpeech) {
+          this.emit('interim-transcription', e.result.text);
         }
-        this.stopRecording();
-      };
+      } catch (error) {
+        logger.error('Error in recognizing handler', { error: error.message });
+      }
+    };
 
-      this.recognizer.sessionStarted = (s, e) => {
-        logger.info('Recognition session started', { sessionId: e.sessionId });
-      };
+    this.recognizer.recognized = (s, e) => {
+      try {
+        if (e.result.reason === sdk.ResultReason.RecognizedSpeech && e.result.text && e.result.text.trim()) {
+          this.emit('transcription', e.result.text);
+        }
+      } catch (error) {
+        logger.error('Error in recognized handler', { error: error.message });
+      }
+    };
 
-      this.recognizer.sessionStopped = (s, e) => {
-        logger.info('Recognition session ended', { sessionId: e.sessionId });
-        this.stopRecording();
-      };
+    this.recognizer.canceled = (s, e) => {
+      logger.warn('Recognition session canceled', {
+        reason: e.reason,
+        errorCode: e.errorCode,
+        errorDetails: e.errorDetails
+      });
 
-       // Start continuous recognition with timeout
-       const startTimeout = setTimeout(() => {
-         logger.error('Recognition start timeout');
-         this.emit('error', 'Speech recognition start timeout. Please try again.');
-         this.stopRecording();
-       }, 10000); // 10 second timeout
-
-       this.recognizer.startContinuousRecognitionAsync(
-         () => {
-           clearTimeout(startTimeout);
-           logger.info('Continuous speech recognition started successfully');
-           if (global.windowManager) {
-             global.windowManager.handleRecordingStarted();
-           }
-         },
-         (error) => {
-           clearTimeout(startTimeout);
-           logger.error('Failed to start continuous recognition', { 
-             error: error.toString(),
-             retryCount: this.retryCount 
-           });
-           
-           // Attempt retry for initialization failures
-           if (this.retryCount < this.maxRetries) {
-             this.retryCount++;
-             logger.info(`Retrying recognition start (attempt ${this.retryCount}/${this.maxRetries})`);
-             this.isRecording = false;
-             setTimeout(() => {
-               this._attemptRecording();
-             }, 2000 * this.retryCount);
-           } else {
-             this.emit('error', `Recognition startup failed after ${this.maxRetries} attempts: ${error}`);
-             this.isRecording = false;
-           }
-         }
-       );
+      if (e.reason === sdk.CancellationReason.Error) {
+        const details = e.errorDetails || '';
+        if (details.includes('1006')) {
+          this.emit('error', 'Network connection failed. Please check your internet connection.');
+        } else if (details.includes('InvalidServiceCredentials')) {
+          this.emit('error', 'Invalid Azure Speech credentials. Please check AZURE_SPEECH_KEY and AZURE_SPEECH_REGION.');
+        } else if (details.includes('Forbidden')) {
+          this.emit('error', 'Access denied. Please check your Azure Speech service subscription and region.');
+        } else if (details.includes('AudioInputMicrophone_InitializationFailure')) {
+          this.emit('error', 'Microphone initialization failed. Please check microphone permissions and availability.');
+        } else {
+          this.emit('error', `Recognition error: ${details}`);
+        }
+      }
 
-    } catch (error) {
-      logger.error('Failed to start recording session', { 
-        error: error.message, 
-        stack: error.stack 
+      this.stopRecording();
+    };
+
+    this.recognizer.sessionStarted = (s, e) => {
+      logger.info('Recognition session started', { sessionId: e.sessionId });
+    };
+
+    this.recognizer.sessionStopped = () => {
+      this.stopRecording();
+    };
+
+    const startTimeout = setTimeout(() => {
+      logger.error('Recognition start timeout');
+      this.emit('error', 'Speech recognition start timeout. Please try again.');
+      this.stopRecording();
+    }, 10000);
+
+    this.recognizer.startContinuousRecognitionAsync(
+      () => {
+        clearTimeout(startTimeout);
+        logger.info('Continuous Azure speech recognition started successfully');
+        if (global.windowManager) {
+          global.windowManager.handleRecordingStarted();
+        }
+      },
+      (error) => {
+        clearTimeout(startTimeout);
+        logger.error('Failed to start continuous recognition', { error: error.toString() });
+        this.emit('error', `Recognition startup failed: ${error}`);
+        this.isRecording = false;
+        this._cleanup();
+      }
+    );
+  }
+
+  _startWhisperRecording() {
+    this._cleanup();
+    this.isRecording = true;
+    this.segmentBuffers = [];
+    this.segmentBytes = 0;
+    this.transcriptionInFlight = false;
+    this.pendingFlush = false;
+    this.emit('recording-started');
+    this.emit('status', 'Local Whisper recording started');
+    this._startMicrophoneCapture();
+
+    const segmentMs = this._getWhisperSegmentMs();
+    this.segmentTimer = setInterval(() => {
+      this._flushWhisperSegment({ final: false }).catch((error) => {
+        logger.error('Whisper segment transcription failed', { error: error.message });
       });
-      this.emit('error', `Recording startup failed: ${error.message}`);
-      this.isRecording = false;
+    }, segmentMs);
+
+    if (global.windowManager) {
+      global.windowManager.handleRecordingStarted();
     }
   }
 
@@ -656,40 +700,77 @@ class SpeechService extends EventEmitter {
 
     this.isRecording = false;
     const sessionDuration = this.sessionStartTime ? Date.now() - this.sessionStartTime : 0;
-    
-    logger.info('Stopping speech recognition session', { 
-      sessionDuration: `${sessionDuration}ms` 
+    logger.info('Stopping speech recognition session', {
+      provider: this.provider,
+      sessionDuration: `${sessionDuration}ms`
     });
 
-    // Stop continuous recognition
-    if (this.recognizer) {
+    if (this.provider === 'azure' && this.recognizer) {
       try {
         this.recognizer.stopContinuousRecognitionAsync(
           () => {
-            logger.info('Speech recognition stopped successfully');
-            this.emit('recording-stopped');
-            this.emit('status', 'Recording stopped');
-            if (global.windowManager) {
-              global.windowManager.handleRecordingStopped();
-            }
-            this._cleanup();
+            this._finalizeStop('Recording stopped');
           },
           (error) => {
             logger.error('Error during recognition stop', { error: error.toString() });
-            this._cleanup();
+            this._finalizeStop('Recording stopped');
           }
         );
       } catch (error) {
         logger.error('Error stopping recognizer', { error: error.message });
-        this._cleanup();
+        this._finalizeStop('Recording stopped');
+      }
+      return;
+    }
+
+    if (this.provider === 'whisper') {
+      this._finalizeWhisperStop();
+      return;
+    }
+
+    this._finalizeStop('Recording stopped');
+  }
+
+  async _finalizeWhisperStop() {
+    if (this.segmentTimer) {
+      clearInterval(this.segmentTimer);
+      this.segmentTimer = null;
+    }
+
+    if (this.recording) {
+      try {
+        this.recording.stop();
+      } catch (error) {
+        logger.error('Error stopping audio recording', { error: error.message });
       }
-    } else {
-      this._cleanup();
+      this.recording = null;
+    }
+
+    try {
+      await this._flushWhisperSegment({ final: true });
+    } catch (error) {
+      logger.error('Final Whisper transcription failed', { error: error.message });
+      this.emit('error', `Whisper transcription failed: ${error.message}`);
+    } finally {
+      this._finalizeStop('Recording stopped');
+    }
+  }
+
+  _finalizeStop(statusMessage) {
+    this._cleanup();
+    this.emit('recording-stopped');
+    this.emit('status', statusMessage);
+    if (global.windowManager) {
+      global.windowManager.handleRecordingStopped();
     }
   }
 
   _cleanup() {
-    // Clean up recognizer
+    if (this.segmentTimer) {
+      clearInterval(this.segmentTimer);
+      this.segmentTimer = null;
+    }
+
     if (this.recognizer) {
       try {
         this.recognizer.close();
@@ -699,73 +780,50 @@ class SpeechService extends EventEmitter {
       this.recognizer = null;
     }
 
-         // Clean up audio config
-     if (this.audioConfig) {
-       try {
-         // Check if close method exists and call it appropriately
-         if (typeof this.audioConfig.close === 'function') {
-           try {
-             const closeResult = this.audioConfig.close();
-             // If it returns a promise, handle it, otherwise just continue
-             if (closeResult && typeof closeResult.then === 'function') {
-               // It's a promise, but we don't need to wait for it in cleanup
-               closeResult.catch((error) => {
-                logger.error('Error closing audio config', { error: error.message });
-               });
-             }
-           } catch (closeError) {
-            logger.error('Error closing audio config', { error: closeError.message });
-           }
-         }
-       } catch (error) {
-         logger.error('Error closing audio config', { error: error.message });
-       }
-       this.audioConfig = null;
-     }
-
-     // Stop audio recording
-     if (this.recording) {
-       try {
-         this.recording.stop();
-         this.recording = null;
-       } catch (error) {
-         logger.error('Error stopping audio recording', { error: error.message });
-       }
-     }
-
-     // Clean up push stream
-     if (this.pushStream) {
-       try {
-         // Check if close method exists and call it appropriately
-         if (typeof this.pushStream.close === 'function') {
-           const closeResult = this.pushStream.close();
-           // If it returns a promise, we can await it, otherwise just continue
-           if (closeResult && typeof closeResult.then === 'function') {
-             // It's a promise, but we don't need to wait for it in cleanup
-             closeResult.catch((error) => {
-             });
-           }
-         }
-       } catch (error) {
-         logger.error('Error closing push stream', { error: error.message });
-       }
-       this.pushStream = null;
-     }
-
-     // Reset audio data logging flag
-     this._audioDataLogged = false;
+    if (this.audioConfig) {
+      try {
+        if (typeof this.audioConfig.close === 'function') {
+          this.audioConfig.close();
+        }
+      } catch (error) {
+        logger.error('Error closing audio config', { error: error.message });
+      }
+      this.audioConfig = null;
+    }
+
+    if (this.recording) {
+      try {
+        this.recording.stop();
+      } catch (error) {
+        logger.error('Error stopping audio recording', { error: error.message });
+      }
+      this.recording = null;
+    }
+
+    if (this.pushStream) {
+      try {
+        if (typeof this.pushStream.close === 'function') {
+          this.pushStream.close();
+        }
+      } catch (error) {
+        logger.error('Error closing push stream', { error: error.message });
+      }
+      this.pushStream = null;
+    }
+
+    this.segmentBuffers = [];
+    this.segmentBytes = 0;
+    this.transcriptionInFlight = false;
+    this.pendingFlush = false;
+    this._audioDataLogged = false;
   }
 
   async recognizeFromFile(audioFilePath) {
-    if (!this.speechConfig) {
-      throw new Error('Speech service not initialized');
-    }
+    if (this.provider === 'azure') {
+      if (!this.speechConfig) {
+        throw new Error('Speech service not initialized');
+      }
 
-    const startTime = Date.now();
-    
-    try {
-      // Validate file exists and is readable
-      const fs = require('fs');
       if (!fs.existsSync(audioFilePath)) {
         throw new Error(`Audio file not found: ${audioFilePath}`);
       }
@@ -773,206 +831,438 @@ class SpeechService extends EventEmitter {
       const audioConfig = sdk.AudioConfig.fromWavFileInput(audioFilePath);
       const recognizer = new sdk.SpeechRecognizer(this.speechConfig, audioConfig);
 
-      const result = await new Promise((resolve, reject) => {
-        const timeout = setTimeout(() => {
-          reject(new Error('File recognition timeout'));
-          recognizer.close();
-        }, 30000); // 30 second timeout
-
+      return await new Promise((resolve, reject) => {
         recognizer.recognizeOnceAsync(
           (result) => {
-            clearTimeout(timeout);
-            if (result.reason === sdk.ResultReason.RecognizedSpeech) {
-              resolve(result.text);
-            } else if (result.reason === sdk.ResultReason.NoMatch) {
-              resolve(''); // No speech detected in file
-            } else {
-              reject(new Error(`File recognition failed: ${result.reason}`));
-            }
+            resolve(result.reason === sdk.ResultReason.RecognizedSpeech ? result.text : '');
             recognizer.close();
             audioConfig.close();
           },
           (error) => {
-            clearTimeout(timeout);
             reject(new Error(`File recognition error: ${error}`));
             recognizer.close();
             audioConfig.close();
           }
         );
       });
+    }
 
-      logger.logPerformance('File speech recognition', startTime, {
-        filePath: audioFilePath,
-        textLength: result.length
-      });
+    if (this.provider === 'whisper') {
+      return this._transcribeWhisperFile(audioFilePath);
+    }
 
-      return result;
-    } catch (error) {
-      logger.error('File recognition failed', { 
-        filePath: audioFilePath, 
-        error: error.message 
-      });
-      throw error;
+    throw new Error('Speech service not initialized');
+  }
+
+  async testConnection() {
+    if (this.provider === 'azure') {
+      if (!this.speechConfig) {
+        throw new Error('Speech service not initialized');
+      }
+
+      try {
+        const audioConfig = sdk.AudioConfig.fromDefaultMicrophoneInput();
+        const recognizer = new sdk.SpeechRecognizer(this.speechConfig, audioConfig);
+        recognizer.close();
+        audioConfig.close();
+        return { success: true, message: 'Azure connection test successful' };
+      } catch (error) {
+        return { success: false, message: error.message };
+      }
     }
+
+    if (this.provider === 'whisper') {
+      return {
+        success: !!this.whisperCommand,
+        message: this.whisperCommand ? 'Local Whisper CLI detected' : 'Local Whisper CLI not found'
+      };
+    }
+
+    return { success: false, message: 'Speech service not initialized' };
   }
 
   getStatus() {
     return {
+      provider: this.provider,
       isRecording: this.isRecording,
-      isInitialized: !!this.speechConfig,
+      isInitialized: this.provider === 'azure' ? !!this.speechConfig : !!this.whisperCommand,
       sessionDuration: this.sessionStartTime ? Date.now() - this.sessionStartTime : 0,
       retryCount: this.retryCount,
-      config: config.get('speech.azure') || {}
+      effectiveSettings: {
+        speechProvider: this.provider,
+        azureKey: this._getSetting('azureKey') || '',
+        azureRegion: this._getSetting('azureRegion') || process.env.AZURE_SPEECH_REGION || '',
+        whisperCommand: this._getSetting('whisperCommand') || process.env.WHISPER_COMMAND || '',
+        whisperModelDir: this._getWhisperModelDir(),
+        whisperModel: this._getWhisperModel(),
+        whisperLanguage: this._getWhisperLanguage(),
+        whisperSegmentMs: String(this._getWhisperSegmentMs())
+      },
+      config: {
+        azure: config.get('speech.azure') || {},
+        whisper: config.get('speech.whisper') || {},
+        selectedProvider: this.provider
+      }
     };
   }
 
-     // Test connection method
-   async testConnection() {
-     if (!this.speechConfig) {
-       throw new Error('Speech service not initialized');
-     }
-
-     try {
-       // Create a simple test recognizer
-       const audioConfig = sdk.AudioConfig.fromDefaultMicrophoneInput();
-       const recognizer = new sdk.SpeechRecognizer(this.speechConfig, audioConfig);
-       
-       // Test by attempting to create the recognizer (this validates credentials)
-       recognizer.close();
-       audioConfig.close();
-       
-       return { success: true, message: 'Connection test successful' };
-     } catch (error) {
-       return { success: false, message: error.message };
-     }
-   }
-
-   // Start capturing real microphone audio using node-record-lpcm16
-   _startMicrophoneCapture() {
-     if (!this.pushStream) return;
-          
-     try {
-       // Check if recorder is available
-       if (!recorder || typeof recorder.record !== 'function') {
-         throw new Error('node-record-lpcm16 not available or not properly installed');
-       }
-
-       // Configure audio recording with error handling
-       this.recording = recorder.record({
-         sampleRateHertz: 16000,  // Azure Speech SDK prefers 16kHz
-         threshold: 0,            // No silence threshold
-         verbose: false,          // Quiet logging
-         recordProgram: 'sox',    // Try 'sox' first (most common on macOS)
-         silence: '10.0s'         // Longer silence threshold
-       });
-
-       if (!this.recording) {
-         throw new Error('Failed to create audio recording instance');
-       }
-
-       // Add error handler for the recording stream before using it
-       this.recording.stream().on('error', (error) => {
-         logger.error('Audio recording stream error', { error: error.message });
-         
-         // Don't emit error immediately, try to recover
-         this._handleAudioError(error);
-       });
-
-       // Pipe audio data to Azure Speech SDK
-       this.recording.stream().on('data', (chunk) => {
-         if (this.pushStream && this.isRecording) {
-           try {
-             this.pushStream.write(chunk);
-             // Console log only first few chunks to avoid spam
-             if (!this._audioDataLogged) {
-               this._audioDataLogged = true;
-             }
-           } catch (error) {
-           }
-         }
-       });
-
-     } catch (error) {
-       logger.error('Failed to start microphone capture', { error: error.message, stack: error.stack });
-       
-       // Fall back to no audio capture (Azure SDK will still work without audio)
-       this.emit('error', `Microphone capture failed: ${error.message}. Speech recognition may not work properly.`);
-     }
-   }
-
-   // Handle audio recording errors with recovery attempts
-   _handleAudioError(error) {
-     
-     // Try to restart recording with different program
-     if (this.recording) {
-       try {
-         this.recording.stop();
-       } catch (stopError) {
-       }
-       this.recording = null;
-     }
-
-     // Try with different recording program
-     setTimeout(() => {
-       if (this.isRecording) {
-         this._startMicrophoneCaptureWithFallback();
-       }
-     }, 1000);
-   }
-
-   // Try microphone capture with different programs as fallback
-   _startMicrophoneCaptureWithFallback() {
-     const programs = ['sox', 'rec', 'arecord'];
-     let currentProgramIndex = 0;
-
-     const tryNextProgram = () => {
-       if (currentProgramIndex >= programs.length) {
-         this.emit('error', 'Could not start microphone capture with any audio program');
-         return;
-       }
-
-       const program = programs[currentProgramIndex];
-
-       try {
-         this.recording = recorder.record({
-           sampleRateHertz: 16000,
-           threshold: 0,
-           verbose: false,
-           recordProgram: program,
-           silence: '10.0s'
-         });
-
-         this.recording.stream().on('error', (error) => {
-           currentProgramIndex++;
-           tryNextProgram();
-         });
-
-         this.recording.stream().on('data', (chunk) => {
-           if (this.pushStream && this.isRecording) {
-             try {
-               this.pushStream.write(chunk);
-               if (!this._audioDataLogged) {
-                 this._audioDataLogged = true;
-               }
-             } catch (error) {
-              logger.error('Error writing audio data', { error: error.message });
-             }
-           }
-         });
-       } catch (error) {
-         logger.error(`${program} configuration failed`, { error: error.message });
-         currentProgramIndex++;
-         tryNextProgram();
-       }
-     };
-
-     tryNextProgram();
-   }
-
-  // Expose availability to UI
   isAvailable() {
-    return !!this.speechConfig && !!this.available;
+    if (this.provider === 'azure') {
+      return !!this.speechConfig && !!this.available;
+    }
+
+    if (this.provider === 'whisper') {
+      return !!this.whisperCommand && !!this.available;
+    }
+
+    return false;
+  }
+
+  updateSettings(settings = {}) {
+    const speechKeys = ['speechProvider', 'azureKey', 'azureRegion', 'whisperCommand', 'whisperModelDir', 'whisperModel', 'whisperLanguage', 'whisperSegmentMs'];
+    let changed = false;
+
+    for (const key of speechKeys) {
+      if (Object.prototype.hasOwnProperty.call(settings, key)) {
+        this.runtimeSettings[key] = settings[key];
+        changed = true;
+      }
+    }
+
+    if (changed) {
+      this.initializeClient();
+    }
+
+    return this.getStatus();
+  }
+
+  _getConfiguredProvider() {
+    const provider = String(this._getSetting('speechProvider') || process.env.SPEECH_PROVIDER || '').trim().toLowerCase();
+
+    if (provider === 'azure' || provider === 'whisper') {
+      return provider;
+    }
+
+    const hasAzure = !!((this._getSetting('azureKey') || process.env.AZURE_SPEECH_KEY) &&
+      (this._getSetting('azureRegion') || process.env.AZURE_SPEECH_REGION));
+
+    if (hasAzure) {
+      return 'azure';
+    }
+
+    return 'whisper';
+  }
+
+  _getWhisperModel() {
+    return this._getSetting('whisperModel') || process.env.WHISPER_MODEL || config.get('speech.whisper.model') || 'base';
+  }
+
+  _getWhisperModelDir() {
+    return this._getSetting('whisperModelDir') || process.env.WHISPER_MODEL_DIR || '';
+  }
+
+  _getWhisperLanguage() {
+    return this._getSetting('whisperLanguage') || process.env.WHISPER_LANGUAGE || config.get('speech.whisper.language') || 'en';
+  }
+
+  _getWhisperSegmentMs() {
+    const rawValue = this._getSetting('whisperSegmentMs') || process.env.WHISPER_SEGMENT_MS || config.get('speech.whisper.segmentMs') || 4000;
+    const parsed = Number(rawValue);
+    return Number.isFinite(parsed) ? Math.max(2000, parsed) : 4000;
+  }
+
+  _getSetting(key) {
+    const value = this.runtimeSettings[key];
+    return value === '' ? null : value;
+  }
+
+  _resolveWhisperCommand() {
+    const configured = this._getSetting('whisperCommand') || process.env.WHISPER_COMMAND;
+    const candidates = [];
+
+    if (configured) {
+      candidates.push(...this._expandConfiguredWhisperCandidates(configured));
+    }
+
+    candidates.push({ command: 'whisper', baseArgs: [] });
+    candidates.push({ command: 'whisper.exe', baseArgs: [] });
+    candidates.push({ command: 'py', baseArgs: ['-3', '-m', 'whisper'] });
+    candidates.push({ command: 'python3', baseArgs: ['-m', 'whisper'] });
+    candidates.push({ command: 'python', baseArgs: ['-m', 'whisper'] });
+
+    for (const candidate of candidates) {
+      if (!candidate || !candidate.command) {
+        continue;
+      }
+
+      const probe = spawnSync(candidate.command, [...candidate.baseArgs, '--help'], {
+        encoding: 'utf8',
+        timeout: 5000
+      });
+
+      const output = `${probe.stdout || ''}\n${probe.stderr || ''}`;
+      if (!probe.error && probe.status === 0 && !output.includes('No module named whisper')) {
+        return candidate;
+      }
+    }
+
+    return null;
+  }
+
+  _expandConfiguredWhisperCandidates(rawCommand) {
+    const parsed = this._parseCommand(rawCommand);
+    if (!parsed) {
+      return [];
+    }
+
+    const candidates = [parsed];
+    const resolvedPath = path.resolve(parsed.command);
+
+    if (resolvedPath !== parsed.command) {
+      candidates.push({ command: resolvedPath, baseArgs: parsed.baseArgs });
+    }
+
+    if (process.platform === 'win32') {
+      if (!/\.(exe|cmd|bat)$/i.test(parsed.command)) {
+        candidates.push({ command: `${parsed.command}.exe`, baseArgs: parsed.baseArgs });
+        candidates.push({ command: `${parsed.command}.cmd`, baseArgs: parsed.baseArgs });
+        candidates.push({ command: `${resolvedPath}.exe`, baseArgs: parsed.baseArgs });
+        candidates.push({ command: `${resolvedPath}.cmd`, baseArgs: parsed.baseArgs });
+      }
+    }
+
+    return candidates;
+  }
+
+  _parseCommand(rawCommand) {
+    const parts = String(rawCommand || '').trim().split(/\s+/).filter(Boolean);
+    if (parts.length === 0) {
+      return null;
+    }
+
+    return {
+      command: parts[0],
+      baseArgs: parts.slice(1)
+    };
+  }
+
+  _startMicrophoneCapture() {
+    if (!recorder || typeof recorder.record !== 'function') {
+      this.emit('error', 'Local microphone capture dependency is missing. Run npm install to restore speech recording support.');
+      return;
+    }
+
+    this._startMicrophoneCaptureWithFallback(['sox', 'rec', 'arecord']);
+  }
+
+  _startMicrophoneCaptureWithFallback(programs) {
+    const queue = [...programs];
+
+    const tryNextProgram = () => {
+      const program = queue.shift();
+      if (!program) {
+        this.emit('error', 'Could not start microphone capture with any audio program');
+        return;
+      }
+
+      try {
+        this.recording = recorder.record({
+          sampleRateHertz: 16000,
+          channels: 1,
+          threshold: 0,
+          verbose: false,
+          recordProgram: program,
+          silence: '10.0s'
+        });
+
+        const stream = this.recording.stream();
+        this.audioProgram = program;
+
+        stream.on('error', (error) => {
+          logger.error('Audio recording stream error', { error: error.message, program });
+          if (this.recording) {
+            try {
+              this.recording.stop();
+            } catch (stopError) {
+              logger.error('Error stopping failed recording program', { error: stopError.message });
+            }
+            this.recording = null;
+          }
+
+          if (this.isRecording) {
+            tryNextProgram();
+          }
+        });
+
+        stream.on('data', (chunk) => {
+          this._handleAudioChunk(chunk);
+        });
+      } catch (error) {
+        logger.error('Failed to start microphone capture program', { program, error: error.message });
+        tryNextProgram();
+      }
+    };
+
+    tryNextProgram();
+  }
+
+  _handleAudioChunk(chunk) {
+    if (!chunk || !chunk.length || !this.isRecording) {
+      return;
+    }
+
+    if (this.provider === 'azure' && this.pushStream) {
+      try {
+        this.pushStream.write(chunk);
+      } catch (error) {
+        logger.error('Error writing audio data to Azure push stream', { error: error.message });
+      }
+      return;
+    }
+
+    if (this.provider === 'whisper') {
+      this.segmentBuffers.push(Buffer.from(chunk));
+      this.segmentBytes += chunk.length;
+    }
+  }
+
+  async _flushWhisperSegment({ final }) {
+    if (this.transcriptionInFlight) {
+      this.pendingFlush = this.pendingFlush || final;
+      return;
+    }
+
+    if (!this.segmentBytes) {
+      return;
+    }
+
+    const audioBuffer = Buffer.concat(this.segmentBuffers, this.segmentBytes);
+    this.segmentBuffers = [];
+    this.segmentBytes = 0;
+
+    this.transcriptionInFlight = true;
+
+    try {
+      const transcript = await this._transcribeWhisperBuffer(audioBuffer);
+      if (transcript && transcript.trim()) {
+        this.emit('transcription', transcript.trim());
+      }
+    } finally {
+      this.transcriptionInFlight = false;
+
+      if (this.pendingFlush) {
+        const shouldRunFinal = this.pendingFlush;
+        this.pendingFlush = false;
+        await this._flushWhisperSegment({ final: shouldRunFinal });
+      }
+    }
+  }
+
+  async _transcribeWhisperBuffer(audioBuffer) {
+    const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'opencluely-whisper-'));
+    const audioFilePath = path.join(tempDir, 'segment.wav');
+
+    try {
+      fs.writeFileSync(audioFilePath, this._createWavBuffer(audioBuffer));
+      return await this._transcribeWhisperFile(audioFilePath);
+    } finally {
+      this._removeTempDir(tempDir);
+    }
+  }
+
+  async _transcribeWhisperFile(audioFilePath) {
+    if (!this.whisperCommand) {
+      throw new Error('Local Whisper CLI not configured');
+    }
+
+    const outputDir = fs.mkdtempSync(path.join(os.tmpdir(), 'opencluely-whisper-out-'));
+    const args = [
+      ...this.whisperCommand.baseArgs,
+      audioFilePath,
+      '--model', this._getWhisperModel(),
+      '--language', this._getWhisperLanguage(),
+      '--task', 'transcribe',
+      '--output_format', 'txt',
+      '--output_dir', outputDir,
+      '--verbose', 'False',
+      '--fp16', 'False'
+    ];
+
+    if (this._getWhisperModelDir()) {
+      args.push('--model_dir', this._getWhisperModelDir());
+    }
+
+    try {
+      await new Promise((resolve, reject) => {
+        const child = spawn(this.whisperCommand.command, args, {
+          stdio: ['ignore', 'pipe', 'pipe']
+        });
+
+        let stderr = '';
+        child.stderr.on('data', (chunk) => {
+          stderr += chunk.toString();
+        });
+
+        child.on('error', (error) => {
+          reject(error);
+        });
+
+        child.on('close', (code) => {
+          if (code === 0) {
+            resolve();
+            return;
+          }
+
+          reject(new Error(stderr.trim() || `Whisper exited with code ${code}`));
+        });
+      });
+
+      const transcriptPath = path.join(outputDir, `${path.parse(audioFilePath).name}.txt`);
+      if (!fs.existsSync(transcriptPath)) {
+        return '';
+      }
+
+      return fs.readFileSync(transcriptPath, 'utf8').trim();
+    } finally {
+      this._removeTempDir(outputDir);
+    }
+  }
+
+  _createWavBuffer(rawPcmBuffer) {
+    const header = Buffer.alloc(44);
+    const sampleRate = 16000;
+    const channels = 1;
+    const bitsPerSample = 16;
+    const byteRate = sampleRate * channels * (bitsPerSample / 8);
+    const blockAlign = channels * (bitsPerSample / 8);
+
+    header.write('RIFF', 0);
+    header.writeUInt32LE(36 + rawPcmBuffer.length, 4);
+    header.write('WAVE', 8);
+    header.write('fmt ', 12);
+    header.writeUInt32LE(16, 16);
+    header.writeUInt16LE(1, 20);
+    header.writeUInt16LE(channels, 22);
+    header.writeUInt32LE(sampleRate, 24);
+    header.writeUInt32LE(byteRate, 28);
+    header.writeUInt16LE(blockAlign, 32);
+    header.writeUInt16LE(bitsPerSample, 34);
+    header.write('data', 36);
+    header.writeUInt32LE(rawPcmBuffer.length, 40);
+
+    return Buffer.concat([header, rawPcmBuffer]);
+  }
+
+  _removeTempDir(tempDir) {
+    try {
+      fs.rmSync(tempDir, { recursive: true, force: true });
+    } catch (error) {
+      logger.error('Failed to remove Whisper temp directory', {
+        tempDir,
+        error: error.message
+      });
+    }
   }
 }
 
-module.exports = new SpeechService();
\ No newline at end of file
+module.exports = new SpeechService();
diff --git a/src/styles/common.css b/src/styles/common.css
index 478f715..2db6f85 100644
--- a/src/styles/common.css
+++ b/src/styles/common.css
@@ -1,7 +1,7 @@
 /* Common Styles for OpenCluely UI Components */
 
 /* Font imports */
-@import url('https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css');
+@import url('../../node_modules/@fortawesome/fontawesome-free/css/all.min.css');
 
 /* Base styles */
 body {
@@ -576,4 +576,4 @@ body {
 .hide-scrollbar {
     scrollbar-width: none;
     -ms-overflow-style: none;
-}
\ No newline at end of file
+}
diff --git a/src/ui/settings-window.js b/src/ui/settings-window.js
index 1062c3c..2a7047e 100644
--- a/src/ui/settings-window.js
+++ b/src/ui/settings-window.js
@@ -6,8 +6,13 @@ document.addEventListener('DOMContentLoaded', () => {
     // Get DOM elements
     const closeButton = document.getElementById('closeButton');
     const quitButton = document.getElementById('quitButton');
+    const speechProviderSelect = document.getElementById('speechProvider');
     const azureKeyInput = document.getElementById('azureKey');
     const azureRegionInput = document.getElementById('azureRegion');
+    const whisperCommandInput = document.getElementById('whisperCommand');
+    const whisperModelInput = document.getElementById('whisperModel');
+    const whisperLanguageInput = document.getElementById('whisperLanguage');
+    const whisperSegmentMsInput = document.getElementById('whisperSegmentMs');
     const geminiKeyInput = document.getElementById('geminiKey');
     const windowGapInput = document.getElementById('windowGap');
     const codingLanguageSelect = document.getElementById('codingLanguage');
@@ -66,8 +71,13 @@ document.addEventListener('DOMContentLoaded', () => {
 
     // Function to load settings into UI
     const loadSettingsIntoUI = (settings) => {
+        if (settings.speechProvider && speechProviderSelect) speechProviderSelect.value = settings.speechProvider;
         if (settings.azureKey && azureKeyInput) azureKeyInput.value = settings.azureKey;
         if (settings.azureRegion && azureRegionInput) azureRegionInput.value = settings.azureRegion;
+        if (settings.whisperCommand && whisperCommandInput) whisperCommandInput.value = settings.whisperCommand;
+        if (settings.whisperModel && whisperModelInput) whisperModelInput.value = settings.whisperModel;
+        if (settings.whisperLanguage && whisperLanguageInput) whisperLanguageInput.value = settings.whisperLanguage;
+        if (settings.whisperSegmentMs && whisperSegmentMsInput) whisperSegmentMsInput.value = settings.whisperSegmentMs;
         if (settings.geminiKey && geminiKeyInput) geminiKeyInput.value = settings.geminiKey;
         if (settings.windowGap && windowGapInput) windowGapInput.value = settings.windowGap;
         
@@ -90,6 +100,8 @@ document.addEventListener('DOMContentLoaded', () => {
                 }
             });
         }
+
+        updateSpeechFieldStates();
     };
 
     // Load settings when window opens
@@ -115,8 +127,13 @@ document.addEventListener('DOMContentLoaded', () => {
     // Save settings helper function
     const saveSettings = () => {
         const settings = {};
+        if (speechProviderSelect) settings.speechProvider = speechProviderSelect.value;
         if (azureKeyInput) settings.azureKey = azureKeyInput.value;
         if (azureRegionInput) settings.azureRegion = azureRegionInput.value;
+        if (whisperCommandInput) settings.whisperCommand = whisperCommandInput.value;
+        if (whisperModelInput) settings.whisperModel = whisperModelInput.value;
+        if (whisperLanguageInput) settings.whisperLanguage = whisperLanguageInput.value;
+        if (whisperSegmentMsInput) settings.whisperSegmentMs = whisperSegmentMsInput.value;
         if (geminiKeyInput) settings.geminiKey = geminiKeyInput.value;
         if (windowGapInput) settings.windowGap = windowGapInput.value;
         if (codingLanguageSelect) settings.codingLanguage = codingLanguageSelect.value;
@@ -125,10 +142,29 @@ document.addEventListener('DOMContentLoaded', () => {
         window.api.send('save-settings', settings);
     };
 
+    const updateSpeechFieldStates = () => {
+        const provider = speechProviderSelect ? speechProviderSelect.value : 'azure';
+        const azureDisabled = provider !== 'azure';
+        const whisperDisabled = provider !== 'whisper';
+
+        [azureKeyInput, azureRegionInput].forEach(input => {
+            if (input) input.disabled = azureDisabled;
+        });
+
+        [whisperCommandInput, whisperModelInput, whisperLanguageInput, whisperSegmentMsInput].forEach(input => {
+            if (input) input.disabled = whisperDisabled;
+        });
+    };
+
     // Add event listeners for all inputs
     const inputs = [
+        speechProviderSelect,
         azureKeyInput,
         azureRegionInput,
+        whisperCommandInput,
+        whisperModelInput,
+        whisperLanguageInput,
+        whisperSegmentMsInput,
         geminiKeyInput,
         windowGapInput
     ];
@@ -140,6 +176,13 @@ document.addEventListener('DOMContentLoaded', () => {
         }
     });
 
+    if (speechProviderSelect) {
+        speechProviderSelect.addEventListener('change', () => {
+            updateSpeechFieldStates();
+            saveSettings();
+        });
+    }
+
     // Language selection handler
     if (codingLanguageSelect) {
         codingLanguageSelect.addEventListener('change', (e) => {
@@ -163,6 +206,8 @@ document.addEventListener('DOMContentLoaded', () => {
         });
     }
 
+    updateSpeechFieldStates();
+
     // Initialize icon grid with correct paths
     const initializeIconGrid = () => {
         if (!iconGrid) return;