Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions .github/workflows/deploy-backend.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
name: Deploy Backend to AWS ECR & EKS

on:
push:
branches:
- main
paths:
- 'src/**'
- 'Dockerfile'
- 'pyproject.toml'
- 'uv.lock'
- 'k8s/deployment.yaml'
- '.github/workflows/deploy-backend.yml'
pull_request:
branches:
- main
paths:
- 'src/**'
- 'Dockerfile'
- 'pyproject.toml'
- 'uv.lock'
- 'k8s/deployment.yaml'

env:
AWS_REGION: us-east-1
ECR_REPOSITORY: workmate-backend
EKS_CLUSTER_NAME: workmate-cluster
KUBECTL_NAMESPACE: workmate
DEPLOYMENT_NAME: workmate-backend

jobs:
deploy:
runs-on: ubuntu-latest

# We only want to actually deploy on push to main, not on PRs
# (Though PRs could still build the image to verify it builds)
steps:
- name: Checkout Code
uses: actions/checkout@v4

- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}

- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2

- name: Build, tag, and push image to Amazon ECR
id: build-image
env:
ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
IMAGE_TAG: ${{ github.sha }}
run: |
# Build a docker container and push it to ECR so that it can be deployed to EKS.
docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG .

# Only push to ECR and deploy if this is a push to main (not a PR)
if [ "${{ github.event_name }}" == "push" ]; then
docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT
fi

- name: Update kubeconfig
if: github.event_name == 'push'
run: aws eks update-kubeconfig --name ${{ env.EKS_CLUSTER_NAME }} --region ${{ env.AWS_REGION }}

- name: Deploy to Amazon EKS
if: github.event_name == 'push'
env:
ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
IMAGE_TAG: ${{ github.sha }}
run: |
# Set the new image for the backend deployment
kubectl set image deployment/${{ env.DEPLOYMENT_NAME }} ${{ env.DEPLOYMENT_NAME }}=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG -n ${{ env.KUBECTL_NAMESPACE }}

# Wait for the rollout to complete
kubectl rollout status deployment/${{ env.DEPLOYMENT_NAME }} -n ${{ env.KUBECTL_NAMESPACE }}
3 changes: 2 additions & 1 deletion k8s/secret.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ stringData:
FRONTEND_URL: "https://REPLACE_ME.pages.dev"

# ── AI / LLM ──────────────────────────────────────────────────
GEMINI_API_KEY: "REPLACE_ME"
GEMINI_LLM_API_KEY: "REPLACE_ME"
GEMINI_EMBEDDING_API_KEY: "REPLACE_ME"
VOYAGE_API_KEY: "REPLACE_ME"

# ── Notion OAuth ──────────────────────────────────────────────
Expand Down
15 changes: 10 additions & 5 deletions src/backend/llm/gemini_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ class GeminiClient:

def __init__(self, model_id: Optional[str] = None):
import os
api_key = os.getenv("GEMINI_KEY") or os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")

api_key = os.getenv("GEMINI_LLM_API_KEY")
if not api_key:
raise ValueError("No Gemini API key found. Set GEMINI_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY.")
raise ValueError("No Gemini API key found. Set GEMINI_LLM_API_KEY.")

self.client = genai.Client(api_key=api_key)
self.model_id = model_id or DEFAULT_GEMINI_MODEL_ID

Expand Down Expand Up @@ -130,7 +131,9 @@ def ask_workmate(
# Friendly message for MVP; later add structured logging + retries
error_str = str(e)
if "429" in error_str or "RESOURCE_EXHAUSTED" in error_str:
retry_match = re.search(r"retryDelay['\"]:\s*['\"](\d+)s?['\"]", error_str)
retry_match = re.search(
r"retryDelay['\"]:\s*['\"](\d+)s?['\"]", error_str
)
retry_secs = retry_match.group(1) if retry_match else "unknown"
logger.warning(
f"⚠️ Gemini rate limit hit (model: {self.model_id}). "
Expand Down Expand Up @@ -177,7 +180,9 @@ async def ask_workmate_stream(
except Exception as e:
error_str = str(e)
if "429" in error_str or "RESOURCE_EXHAUSTED" in error_str:
retry_match = re.search(r"retryDelay['\"]:\s*['\"](\d+)s?['\"]", error_str)
retry_match = re.search(
r"retryDelay['\"]:\s*['\"](\d+)s?['\"]", error_str
)
retry_secs = retry_match.group(1) if retry_match else "unknown"
logger.warning(
f"⚠️ Gemini rate limit hit (model: {self.model_id}). "
Expand Down
10 changes: 7 additions & 3 deletions src/backend/load/google_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ class GoogleEmbedder(EmbeddingFunction):
def __init__(self, model_name="gemini-embedding-001"):
self.model_name = model_name

api_key = os.getenv("GEMINI_KEY") or os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
api_key = os.getenv("GEMINI_EMBEDDING_API_KEY")
if not api_key:
print("⚠️ WARNING: No Gemini API key found in environment (GEMINI_KEY / GEMINI_API_KEY).")
print(
"⚠️ WARNING: No Gemini API key found in environment (GEMINI_EMBEDDING_API_KEY)."
)

self.client = genai.Client(api_key=api_key)

Expand Down Expand Up @@ -60,7 +62,9 @@ def __call__(self, input: Documents) -> Embeddings:
error_str = str(e)
if "429" in error_str and attempt < 2:
wait_time = 45 * (attempt + 1)
print(f"⏳ Rate limited on chunk {i+1}/{len(input)}, waiting {wait_time}s...")
print(
f"⏳ Rate limited on chunk {i + 1}/{len(input)}, waiting {wait_time}s..."
)
time.sleep(wait_time)
else:
raise e
Expand Down
Loading