RubyRyn · RutvikNK · Apr 29, 2026 · Apr 27, 2026 · Apr 27, 2026
diff --git a/.github/workflows/deploy-backend.yml b/.github/workflows/deploy-backend.yml
@@ -0,0 +1,81 @@
+name: Deploy Backend to AWS ECR & EKS
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - 'src/**'
+      - 'Dockerfile'
+      - 'pyproject.toml'
+      - 'uv.lock'
+      - 'k8s/deployment.yaml'
+      - '.github/workflows/deploy-backend.yml'
+  pull_request:
+    branches:
+      - main
+    paths:
+      - 'src/**'
+      - 'Dockerfile'
+      - 'pyproject.toml'
+      - 'uv.lock'
+      - 'k8s/deployment.yaml'
+
+env:
+  AWS_REGION: us-east-1
+  ECR_REPOSITORY: workmate-backend
+  EKS_CLUSTER_NAME: workmate-cluster
+  KUBECTL_NAMESPACE: workmate
+  DEPLOYMENT_NAME: workmate-backend
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+
+    # We only want to actually deploy on push to main, not on PRs
+    # (Though PRs could still build the image to verify it builds)
+    steps:
+      - name: Checkout Code
+        uses: actions/checkout@v4
+
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ env.AWS_REGION }}
+
+      - name: Login to Amazon ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v2
+
+      - name: Build, tag, and push image to Amazon ECR
+        id: build-image
+        env:
+          ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
+          IMAGE_TAG: ${{ github.sha }}
+        run: |
+          # Build a docker container and push it to ECR so that it can be deployed to EKS.
+          docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG .
+
+          # Only push to ECR and deploy if this is a push to main (not a PR)
+          if [ "${{ github.event_name }}" == "push" ]; then
+            docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
+            echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Update kubeconfig
+        if: github.event_name == 'push'
+        run: aws eks update-kubeconfig --name ${{ env.EKS_CLUSTER_NAME }} --region ${{ env.AWS_REGION }}
+
+      - name: Deploy to Amazon EKS
+        if: github.event_name == 'push'
+        env:
+          ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
+          IMAGE_TAG: ${{ github.sha }}
+        run: |
+          # Set the new image for the backend deployment
+          kubectl set image deployment/${{ env.DEPLOYMENT_NAME }} ${{ env.DEPLOYMENT_NAME }}=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG -n ${{ env.KUBECTL_NAMESPACE }}
+
+          # Wait for the rollout to complete
+          kubectl rollout status deployment/${{ env.DEPLOYMENT_NAME }} -n ${{ env.KUBECTL_NAMESPACE }}
diff --git a/k8s/secret.yaml b/k8s/secret.yaml
@@ -40,7 +40,8 @@ stringData:
   FRONTEND_URL: "https://REPLACE_ME.pages.dev"
 
   # ── AI / LLM ──────────────────────────────────────────────────
-  GEMINI_API_KEY: "REPLACE_ME"
+  GEMINI_LLM_API_KEY: "REPLACE_ME"
+  GEMINI_EMBEDDING_API_KEY: "REPLACE_ME"
   VOYAGE_API_KEY: "REPLACE_ME"
 
   # ── Notion OAuth ──────────────────────────────────────────────

diff --git a/src/backend/llm/gemini_client.py b/src/backend/llm/gemini_client.py
@@ -21,10 +21,11 @@ class GeminiClient:
 
     def __init__(self, model_id: Optional[str] = None):
         import os
-        api_key = os.getenv("GEMINI_KEY") or os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
+
+        api_key = os.getenv("GEMINI_LLM_API_KEY")
         if not api_key:
-            raise ValueError("No Gemini API key found. Set GEMINI_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY.")
-            
+            raise ValueError("No Gemini API key found. Set GEMINI_LLM_API_KEY.")
+
         self.client = genai.Client(api_key=api_key)
         self.model_id = model_id or DEFAULT_GEMINI_MODEL_ID
 
@@ -130,7 +131,9 @@ def ask_workmate(
             # Friendly message for MVP; later add structured logging + retries
             error_str = str(e)
             if "429" in error_str or "RESOURCE_EXHAUSTED" in error_str:
-                retry_match = re.search(r"retryDelay['\"]:\s*['\"](\d+)s?['\"]", error_str)
+                retry_match = re.search(
+                    r"retryDelay['\"]:\s*['\"](\d+)s?['\"]", error_str
+                )
                 retry_secs = retry_match.group(1) if retry_match else "unknown"
                 logger.warning(
                     f"⚠️  Gemini rate limit hit (model: {self.model_id}). "
@@ -177,7 +180,9 @@ async def ask_workmate_stream(
         except Exception as e:
             error_str = str(e)
             if "429" in error_str or "RESOURCE_EXHAUSTED" in error_str:
-                retry_match = re.search(r"retryDelay['\"]:\s*['\"](\d+)s?['\"]", error_str)
+                retry_match = re.search(
+                    r"retryDelay['\"]:\s*['\"](\d+)s?['\"]", error_str
+                )
                 retry_secs = retry_match.group(1) if retry_match else "unknown"
                 logger.warning(
                     f"⚠️  Gemini rate limit hit (model: {self.model_id}). "

diff --git a/src/backend/load/google_embedder.py b/src/backend/load/google_embedder.py
@@ -24,9 +24,11 @@ class GoogleEmbedder(EmbeddingFunction):
     def __init__(self, model_name="gemini-embedding-001"):
         self.model_name = model_name
 
-        api_key = os.getenv("GEMINI_KEY") or os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
+        api_key = os.getenv("GEMINI_EMBEDDING_API_KEY")
         if not api_key:
-            print("⚠️ WARNING: No Gemini API key found in environment (GEMINI_KEY / GEMINI_API_KEY).")
+            print(
+                "⚠️ WARNING: No Gemini API key found in environment (GEMINI_EMBEDDING_API_KEY)."
+            )
 
         self.client = genai.Client(api_key=api_key)
 
@@ -60,7 +62,9 @@ def __call__(self, input: Documents) -> Embeddings:
                     error_str = str(e)
                     if "429" in error_str and attempt < 2:
                         wait_time = 45 * (attempt + 1)
-                        print(f"⏳ Rate limited on chunk {i+1}/{len(input)}, waiting {wait_time}s...")
+                        print(
+                            f"⏳ Rate limited on chunk {i + 1}/{len(input)}, waiting {wait_time}s..."
+                        )
                         time.sleep(wait_time)
                     else:
                         raise e