diff --git a/.github/workflows/generate-from-spec.yml b/.github/workflows/generate-from-spec.yml new file mode 100644 index 0000000..d093e79 --- /dev/null +++ b/.github/workflows/generate-from-spec.yml @@ -0,0 +1,167 @@ +name: Generate SDK from OpenAPI Spec + +on: + push: + paths: + - 'openapi.yaml' + + # Manual trigger for testing + workflow_dispatch: + +jobs: + generate: + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + + steps: + - name: Checkout Python SDK + uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - name: Get branch and diff info + id: info + run: | + # Get the branch name this workflow is running on + BRANCH_NAME="${GITHUB_REF#refs/heads/}" + echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT + + # Get commit SHAs for diff + BEFORE_SHA="${{ github.event.before }}" + AFTER_SHA="${{ github.sha }}" + + # Handle new branch case + if [[ "$BEFORE_SHA" == "0000000000000000000000000000000000000000" ]]; then + BEFORE_SHA=$(git rev-parse HEAD~1 2>/dev/null || echo "") + fi + + echo "before_sha=$BEFORE_SHA" >> $GITHUB_OUTPUT + echo "after_sha=$AFTER_SHA" >> $GITHUB_OUTPUT + + # Generate diff for the spec file + if [[ -n "$BEFORE_SHA" ]]; then + git diff "$BEFORE_SHA" "$AFTER_SHA" -- openapi.yaml > spec.diff || touch spec.diff + else + touch spec.diff + fi + + echo "Diff size: $(wc -l < spec.diff) lines" + echo "Running on branch: $BRANCH_NAME" + + - name: Fetch prompt and build context + run: | + # Fetch static prompt from agent-toolkit + curl -sL https://raw.githubusercontent.com/video-db/agent-toolkit/main/context/prompts/spec-to-python-sdk.txt > static_prompt.txt + + # Build full prompt with dynamic content + cat > codex_prompt.md << 'PROMPT_EOF' + ## Git Diff of OpenAPI Spec Changes + + The following diff shows what changed in the API specification: + + ```diff + PROMPT_EOF + + cat spec.diff >> codex_prompt.md + + cat >> codex_prompt.md << 'PROMPT_EOF' + ``` + + ## Current OpenAPI Spec + + If you need to reference the full spec for context, it's available at: openapi.yaml + + --- + + PROMPT_EOF + + # Append static instructions + cat static_prompt.txt >> codex_prompt.md + + echo "Prompt built successfully" + + - name: Run Codex + uses: openai/codex-action@v1 + with: + openai-api-key: ${{ secrets.OPENAI_API_KEY }} + model: o4-mini + sandbox: workspace-write + prompt-file: codex_prompt.md + + - name: Check for changes and create PR + id: create_pr + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + # Configure git + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + # Check if there are changes + if git diff --quiet && git diff --staged --quiet; then + echo "No changes generated by Codex" + echo "has_changes=false" >> $GITHUB_OUTPUT + exit 0 + fi + + echo "has_changes=true" >> $GITHUB_OUTPUT + + # Clean up temporary files - DO NOT commit these + rm -f spec.diff static_prompt.txt codex_prompt.md + + # Get the base branch name + BASE_BRANCH="${{ steps.info.outputs.branch_name }}" + + # Create work branch from the current branch + WORK_BRANCH="auto/spec-sync-$(date +%Y%m%d-%H%M%S)" + git checkout -b "$WORK_BRANCH" + git add -A + + # Commit + git commit -m "feat: sync with OpenAPI spec changes + + Source branch: ${BASE_BRANCH} + + Generated by OpenAI Codex" + + # Push + git push origin "$WORK_BRANCH" + + echo "work_branch=$WORK_BRANCH" >> $GITHUB_OUTPUT + echo "base_branch=$BASE_BRANCH" >> $GITHUB_OUTPUT + + # Create PR targeting the original branch + gh pr create \ + --base "$BASE_BRANCH" \ + --title "feat: sync with OpenAPI spec" \ + --body "## Summary + + Automated SDK update based on OpenAPI spec changes. + + **Base branch**: \`$BASE_BRANCH\` + + ## Review Checklist + + - [ ] Generated code follows SDK conventions + - [ ] Method signatures are correct + - [ ] No breaking changes introduced + - [ ] Tests pass locally + + --- + *Generated by [OpenAI Codex](https://github.com/openai/codex)*" + + - name: Trigger Node SDK Generation + if: steps.create_pr.outputs.has_changes == 'true' + uses: peter-evans/repository-dispatch@v3 + with: + token: ${{ secrets.SDK_SYNC_PAT }} + repository: ${{ github.repository_owner }}/videodb-node + event-type: python-updated + client-payload: | + { + "source_branch": "${{ steps.create_pr.outputs.work_branch }}", + "target_branch": "${{ steps.create_pr.outputs.base_branch }}", + "trigger_type": "spec_change" + } diff --git a/.github/workflows/notify-node-sdk.yml b/.github/workflows/notify-node-sdk.yml new file mode 100644 index 0000000..e83ba3a --- /dev/null +++ b/.github/workflows/notify-node-sdk.yml @@ -0,0 +1,91 @@ +name: Notify Node SDK on Python Code Changes + +on: + push: + paths: + - 'videodb/*.py' + - 'videodb/**/*.py' + - '!videodb/__about__.py' + - '!videodb/__init__.py' + +jobs: + notify: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - name: Check if spec also changed + id: check_spec + run: | + BEFORE_SHA="${{ github.event.before }}" + AFTER_SHA="${{ github.sha }}" + + # Handle new branch case + if [[ "$BEFORE_SHA" == "0000000000000000000000000000000000000000" ]]; then + BEFORE_SHA=$(git rev-parse HEAD~1 2>/dev/null || echo "") + fi + + # Check if openapi.yaml changed in this push + if [[ -n "$BEFORE_SHA" ]]; then + SPEC_CHANGED=$(git diff --name-only "$BEFORE_SHA" "$AFTER_SHA" -- openapi.yaml | wc -l) + else + SPEC_CHANGED=0 + fi + + if [[ "$SPEC_CHANGED" -gt 0 ]]; then + echo "spec_changed=true" >> $GITHUB_OUTPUT + echo "Spec also changed - skipping (generate-from-spec.yml will handle this)" + else + echo "spec_changed=false" >> $GITHUB_OUTPUT + echo "Only Python code changed - will notify Node SDK" + fi + + - name: Get branch and changed files + if: steps.check_spec.outputs.spec_changed == 'false' + id: info + run: | + # Get the branch name + BRANCH_NAME="${GITHUB_REF#refs/heads/}" + echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT + + BEFORE_SHA="${{ github.event.before }}" + AFTER_SHA="${{ github.sha }}" + + # Handle new branch case + if [[ "$BEFORE_SHA" == "0000000000000000000000000000000000000000" ]]; then + BEFORE_SHA=$(git rev-parse HEAD~1 2>/dev/null || echo "") + fi + + # Get changed Python files (comma-separated for JSON) + if [[ -n "$BEFORE_SHA" ]]; then + FILES=$(git diff --name-only "$BEFORE_SHA" "$AFTER_SHA" -- 'videodb/*.py' 'videodb/**/*.py' | grep -v '__about__\|__init__' | tr '\n' ',' | sed 's/,$//' || true) + else + FILES="" + fi + + echo "changed_files=$FILES" >> $GITHUB_OUTPUT + echo "before_sha=$BEFORE_SHA" >> $GITHUB_OUTPUT + echo "after_sha=$AFTER_SHA" >> $GITHUB_OUTPUT + + echo "Branch: $BRANCH_NAME" + echo "Changed files: $FILES" + + - name: Trigger Node SDK Generation + if: steps.check_spec.outputs.spec_changed == 'false' && steps.info.outputs.changed_files != '' + uses: peter-evans/repository-dispatch@v3 + with: + token: ${{ secrets.SDK_SYNC_PAT }} + repository: ${{ github.repository_owner }}/videodb-node + event-type: python-updated + client-payload: | + { + "source_branch": "${{ steps.info.outputs.branch_name }}", + "target_branch": "${{ steps.info.outputs.branch_name }}", + "trigger_type": "code_change", + "changed_files": "${{ steps.info.outputs.changed_files }}", + "before_sha": "${{ steps.info.outputs.before_sha }}", + "after_sha": "${{ steps.info.outputs.after_sha }}" + } diff --git a/.gitignore b/.gitignore index 8ae2cb6..17bfbc6 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ venv/ .vscode/* example.ipynb example.py +videodb-recorder \ No newline at end of file diff --git a/openapi.yaml b/openapi.yaml new file mode 100644 index 0000000..417b87b --- /dev/null +++ b/openapi.yaml @@ -0,0 +1,5605 @@ +openapi: 3.0.3 +info: + title: VideoDB Server API + description: | + VideoDB Server API for video, audio, and image processing with AI capabilities. + This API provides comprehensive video management, search, indexing, and AI-powered features. + version: 1.0.0 + contact: + name: VideoDB Support + url: https://videodb.io + license: + name: MIT + url: https://opensource.org/licenses/MIT + +servers: + - url: https://api.videodb.io + description: Production server + - url: https://staging-api.videodb.io + description: Staging server + +security: + - ApiKeyAuth: [] + +components: + securitySchemes: + ApiKeyAuth: + type: apiKey + in: header + name: x-access-token + description: API key for authentication (sk-xxx format) + + schemas: + Error: + type: object + properties: + success: + type: boolean + example: false + message: + type: string + example: "Error message" + error_code: + type: string + example: "ERROR_CODE" + + SuccessResponse: + type: object + properties: + success: + type: boolean + example: true + message: + type: string + example: "Operation successful" + + AsyncResponse: + type: object + properties: + success: + type: boolean + example: true + status: + type: string + enum: [processing, done, failed] + example: "processing" + data: + type: object + properties: + id: + type: string + example: "job-123" + output_url: + type: string + example: "https://api.videodb.io/async-response/job-123" + + User: + type: object + properties: + user_id: + type: string + example: "u-12345" + user_name: + type: string + example: "John Doe" + user_email: + type: string + example: "john@example.com" + collections: + type: array + items: + type: string + example: ["default", "c-67890"] + default_collection: + type: string + example: "default" + + Collection: + type: object + properties: + id: + type: string + example: "default" + name: + type: string + example: "My Collection" + description: + type: string + example: "Collection description" + is_public: + type: boolean + example: false + owner: + type: string + example: "u-12345" + created_at: + type: string + format: date-time + + Video: + type: object + properties: + id: + type: string + example: "m-12345" + name: + type: string + example: "video.mp4" + description: + type: string + example: "Video description" + collection_id: + type: string + example: "default" + length: + type: number + example: 120.5 + size: + type: number + example: 1048576 + stream_url: + type: string + example: "https://stream.videodb.io/v/12345" + player_url: + type: string + example: "https://console.videodb.io/player/12345" + thumbnail_url: + type: string + example: "https://assets.videodb.io/thumb/12345.jpg" + created_at: + type: string + format: date-time + + Audio: + type: object + properties: + id: + type: string + example: "a-12345" + name: + type: string + example: "audio.mp3" + collection_id: + type: string + example: "default" + length: + type: number + example: 60.0 + size: + type: number + example: 524288 + created_at: + type: string + format: date-time + + Image: + type: object + properties: + id: + type: string + example: "img-12345" + name: + type: string + example: "image.jpg" + collection_id: + type: string + example: "default" + width: + type: number + example: 1920 + height: + type: number + example: 1080 + size: + type: number + example: 262144 + url: + type: string + example: "https://assets.videodb.io/img/12345.jpg" + created_at: + type: string + format: date-time + + SearchResult: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + query: + type: string + example: "search query" + results: + type: array + items: + type: object + properties: + video_id: + type: string + example: "m-12345" + start: + type: number + example: 10.5 + end: + type: number + example: 20.3 + text: + type: string + example: "matched content" + score: + type: number + example: 0.95 + + Timeline: + type: object + properties: + video_id: + type: string + example: "m-12345" + clips: + type: array + items: + type: object + properties: + start: + type: number + example: 0 + end: + type: number + example: 30 + volume: + type: number + example: 1.0 + + BillingUsage: + type: object + properties: + credit_balance: + type: number + example: 100.50 + usage_this_month: + type: number + example: 25.75 + breakdown: + type: object + additionalProperties: + type: number + + RTStream: + type: object + properties: + id: + type: string + example: "rts-12345" + name: + type: string + example: "My Stream" + status: + type: string + enum: [connected, stopped] + example: "connected" + sample_rate: + type: integer + example: 30 + media_types: + type: array + items: + type: string + enum: [video, audio] + example: ["video", "audio"] + collection_id: + type: string + example: "default" + store: + type: boolean + example: false + created_at: + type: string + format: date-time + + CaptureSession: + type: object + properties: + session_id: + type: string + example: "capture-12345" + end_user_id: + type: string + example: "user-123" + status: + type: string + enum: [created, starting, active, stopped, failed] + example: "created" + collection_id: + type: string + example: "default" + ws_connection_id: + type: string + example: "conn-123" + metadata: + type: object + created_at: + type: string + format: date-time + +paths: + /: + get: + summary: Get service information + description: Returns basic service information + responses: + '200': + description: Service information + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + service: + type: string + example: "VideoDB Server" + + /user: + get: + summary: Get user information + security: + - ApiKeyAuth: [] + responses: + '200': + description: User information + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/User' + '401': + description: Unauthorized + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /user/api_key: + get: + summary: Get user API keys + security: + - ApiKeyAuth: [] + responses: + '200': + description: List of API keys + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: array + items: + type: object + properties: + key: + type: string + example: "sk-xxx" + created_at: + type: string + format: date-time + + post: + summary: Create new API key + security: + - ApiKeyAuth: [] + responses: + '200': + description: API key created + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + api_key: + type: string + example: "sk-xxx" + + /user/api_key/{api_key}: + delete: + summary: Delete API key + security: + - ApiKeyAuth: [] + parameters: + - name: api_key + in: path + required: true + schema: + type: string + example: "sk-xxx" + responses: + '200': + description: API key deleted + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /collection: + get: + summary: Get user collections + security: + - ApiKeyAuth: [] + responses: + '200': + description: List of collections + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + collections: + type: array + items: + $ref: '#/components/schemas/Collection' + default_collection: + type: string + example: "default" + + post: + summary: Create new collection + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + name: + type: string + example: "My New Collection" + description: + type: string + example: "Collection for my videos" + is_public: + type: boolean + example: false + responses: + '200': + description: Collection created + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/Collection' + + /collection/{collection_id}: + get: + summary: Get collection details + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + responses: + '200': + description: Collection details + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/Collection' + + patch: + summary: Update collection + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + name: + type: string + example: "Updated Collection Name" + description: + type: string + example: "Updated description" + is_public: + type: boolean + example: true + responses: + '200': + description: Collection updated + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/Collection' + + delete: + summary: Delete collection + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + responses: + '200': + description: Collection deleted + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /collection/{collection_id}/upload: + post: + summary: Upload media to collection + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - url + properties: + url: + type: string + example: "https://example.com/video.mp4" + name: + type: string + example: "My Video" + media_type: + type: string + enum: [video, audio, image] + example: "video" + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Upload initiated + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/AsyncResponse' + - type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/Video' + + /collection/{collection_id}/search/: + post: + summary: Search within collection + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - query + properties: + query: + type: string + example: "search query" + index_type: + type: string + enum: [spoken_word, scene] + example: "spoken_word" + search_type: + type: string + enum: [semantic, custom] + example: "semantic" + score_threshold: + type: number + example: 0.2 + result_threshold: + type: integer + example: 10 + stitch: + type: boolean + example: true + rerank: + type: boolean + example: false + filter: + type: array + items: + type: object + responses: + '200': + description: Search results + content: + application/json: + schema: + $ref: '#/components/schemas/SearchResult' + + /video/: + get: + summary: List videos + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: query + schema: + type: string + example: "default" + - name: page_index + in: query + schema: + type: integer + example: 0 + - name: count + in: query + schema: + type: integer + maximum: 5000 + example: 50 + responses: + '200': + description: List of videos + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + videos: + type: array + items: + $ref: '#/components/schemas/Video' + + /video/{video_id}: + get: + summary: Get video details + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: collection_id + in: query + schema: + type: string + example: "default" + responses: + '200': + description: Video details + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/Video' + + patch: + summary: Update video + tags: + - Videos + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + name: + type: string + example: "Updated Video Name" + responses: + '200': + description: Video updated + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/Video' + + delete: + summary: Delete video + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + responses: + '200': + description: Video deleted + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /video/{video_id}/storage/: + delete: + summary: Delete video storage + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + responses: + '200': + description: Video storage deleted + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /video/{video_id}/stream/: + post: + summary: Create video stream + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + format: + type: string + enum: [mp4, webm, hls] + example: "mp4" + quality: + type: string + enum: [low, medium, high] + example: "high" + responses: + '200': + description: Stream created + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + stream_url: + type: string + example: "https://stream.videodb.io/v/12345" + + /video/{video_id}/thumbnail/: + get: + summary: Get video thumbnail + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: timestamp + in: query + schema: + type: number + example: 10.5 + responses: + '200': + description: Thumbnail URL + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + thumbnail_url: + type: string + example: "https://assets.videodb.io/thumb/12345.jpg" + + post: + summary: Generate custom thumbnail + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + timestamp: + type: number + example: 10.5 + width: + type: integer + example: 320 + height: + type: integer + example: 180 + responses: + '200': + description: Thumbnail generated + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /video/{video_id}/thumbnails/: + get: + summary: Get all video thumbnails + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + responses: + '200': + description: List of thumbnails + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: array + items: + type: object + properties: + timestamp: + type: number + example: 10.5 + url: + type: string + example: "https://assets.videodb.io/thumb/12345_10.jpg" + + /video/{video_id}/transcription/: + get: + summary: Get video transcription + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: engine + in: query + schema: + type: string + default: "default" + example: "default" + - name: start + in: query + schema: + type: number + default: 0 + example: 10.5 + - name: end + in: query + schema: + type: number + default: -1 + example: 60.0 + - name: segmenter + in: query + schema: + type: string + default: "word" + example: "word" + - name: length + in: query + schema: + type: integer + default: 1 + example: 1 + responses: + '200': + description: Transcription data + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + status: + type: string + enum: [completed, processing, failed] + example: "completed" + data: + type: object + properties: + transcript: + type: array + items: + type: object + properties: + text: + type: string + example: "Hello world" + start: + type: number + example: 1.5 + end: + type: number + example: 3.2 + + post: + summary: Generate video transcription + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + engine: + type: string + default: "default" + example: "default" + force: + type: boolean + example: false + language_code: + type: string + example: "en-US" + callback_url: + type: string + example: "https://webhook.example.com/callback" + callback_data: + type: object + responses: + '200': + description: Transcription job started + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/AsyncResponse' + - type: object + properties: + success: + type: boolean + example: true + message: + type: string + example: "transcription already exists" + + /video/{video_id}/index/: + get: + summary: Get video index status + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: index_type + in: query + required: true + schema: + type: string + enum: [spoken_word, scene] + example: "spoken_word" + - name: engine + in: query + schema: + type: string + default: "default" + example: "default" + responses: + '200': + description: Index status + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + status: + type: string + enum: [done, processing, failed] + example: "done" + message: + type: string + example: "Index is available" + + post: + summary: Create video index + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + index_type: + type: string + enum: [spoken_word, scene] + default: "spoken_word" + example: "spoken_word" + engine: + type: string + default: "default" + example: "default" + force: + type: boolean + example: false + language_code: + type: string + example: "en-US" + segmentation_type: + type: string + default: "sentence" + example: "sentence" + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Index job started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /video/{video_id}/search/: + post: + summary: Search within video + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - query + properties: + query: + type: string + example: "search query" + index_type: + type: string + enum: [spoken_word, scene] + example: "spoken_word" + search_type: + type: string + enum: [semantic, keyword] + example: "semantic" + score_threshold: + type: number + example: 0.2 + result_threshold: + type: integer + example: 10 + stitch: + type: boolean + example: true + scene_index_id: + type: string + example: "idx-12345" + filter: + type: array + items: + type: object + responses: + '200': + description: Search results + content: + application/json: + schema: + $ref: '#/components/schemas/SearchResult' + + /video/{video_id}/scenes/: + get: + summary: Get video scenes + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + responses: + '200': + description: List of scenes + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: array + items: + type: object + properties: + scene_id: + type: string + example: "scene-123" + start_time: + type: number + example: 10.5 + end_time: + type: number + example: 25.3 + description: + type: string + example: "Scene description" + thumbnail_url: + type: string + example: "https://assets.videodb.io/scene/123.jpg" + + post: + summary: Create video scenes + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + scene_type: + type: string + enum: [shot, time_based] + example: "shot" + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Scene creation started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /video/{video_id}/scene/{scene_id}/describe/: + post: + summary: Describe video scene + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: scene_id + in: path + required: true + schema: + type: string + example: "scene-123" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + prompt: + type: string + example: "Describe what happens in this scene" + model_name: + type: string + example: "gpt-4" + responses: + '200': + description: Scene description generated + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + description: + type: string + example: "Scene description text" + + /video/{video_id}/frame/{frame_id}/describe/: + post: + summary: Describe video frame + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: frame_id + in: path + required: true + schema: + type: string + example: "frame-123" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + prompt: + type: string + example: "Describe this frame" + model_name: + type: string + example: "gpt-4-vision" + responses: + '200': + description: Frame description generated + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + description: + type: string + example: "Frame description text" + + /video/{video_id}/clip: + post: + summary: Generate video clip + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - prompt + properties: + prompt: + type: string + example: "Create a clip about the introduction" + content_type: + type: string + default: "spoken" + example: "spoken" + model_name: + type: string + default: "basic" + example: "basic" + scene_index_id: + type: string + example: "idx-12345" + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Clip generation started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /video/{video_id}/workflow/: + post: + summary: Execute video workflow + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - workflow_type + properties: + workflow_type: + type: string + enum: [transcribe, index, analyze] + example: "transcribe" + config: + type: object + properties: + language: + type: string + example: "en" + model: + type: string + example: "gpt-4" + responses: + '200': + description: Workflow started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /audio/: + get: + summary: List audios + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: query + schema: + type: string + example: "default" + responses: + '200': + description: List of audios + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + audios: + type: array + items: + $ref: '#/components/schemas/Audio' + + /audio/{audio_id}: + get: + summary: Get audio details + security: + - ApiKeyAuth: [] + parameters: + - name: audio_id + in: path + required: true + schema: + type: string + pattern: "^a-" + example: "a-12345" + - name: collection_id + in: query + schema: + type: string + example: "default" + responses: + '200': + description: Audio details + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/Audio' + + patch: + summary: Update audio + tags: + - Audio + security: + - ApiKeyAuth: [] + parameters: + - name: audio_id + in: path + required: true + schema: + type: string + pattern: "^a-" + example: "a-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + name: + type: string + example: "Updated Audio Name" + responses: + '200': + description: Audio updated + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/Audio' + + delete: + summary: Delete audio + security: + - ApiKeyAuth: [] + parameters: + - name: audio_id + in: path + required: true + schema: + type: string + pattern: "^a-" + example: "a-12345" + responses: + '200': + description: Audio deleted + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /audio/{audio_id}/generate_url: + post: + summary: Generate audio stream URL + security: + - ApiKeyAuth: [] + parameters: + - name: audio_id + in: path + required: true + schema: + type: string + pattern: "^a-" + example: "a-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + format: + type: string + enum: [mp3, wav, flac] + example: "mp3" + quality: + type: string + enum: [low, medium, high] + example: "high" + responses: + '200': + description: Stream URL generated + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + stream_url: + type: string + example: "https://stream.videodb.io/a/12345" + + /image/: + get: + summary: List images + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: query + schema: + type: string + example: "default" + responses: + '200': + description: List of images + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + images: + type: array + items: + $ref: '#/components/schemas/Image' + + /image/{image_id}: + get: + summary: Get image details + security: + - ApiKeyAuth: [] + parameters: + - name: image_id + in: path + required: true + schema: + type: string + pattern: "^img-" + example: "img-12345" + - name: collection_id + in: query + schema: + type: string + example: "default" + responses: + '200': + description: Image details + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/Image' + + patch: + summary: Update image + tags: + - Images + security: + - ApiKeyAuth: [] + parameters: + - name: image_id + in: path + required: true + schema: + type: string + pattern: "^img-" + example: "img-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + name: + type: string + example: "Updated Image Name" + responses: + '200': + description: Image updated + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/Image' + + delete: + summary: Delete image + security: + - ApiKeyAuth: [] + parameters: + - name: image_id + in: path + required: true + schema: + type: string + pattern: "^img-" + example: "img-12345" + responses: + '200': + description: Image deleted + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /image/{image_id}/generate_url: + post: + summary: Generate image URL + security: + - ApiKeyAuth: [] + parameters: + - name: image_id + in: path + required: true + schema: + type: string + pattern: "^img-" + example: "img-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + format: + type: string + enum: [jpg, png, webp] + example: "jpg" + quality: + type: integer + minimum: 1 + maximum: 100 + example: 90 + width: + type: integer + example: 1024 + height: + type: integer + example: 768 + responses: + '200': + description: Image URL generated + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + image_url: + type: string + example: "https://assets.videodb.io/img/12345.jpg" + + /collection/{collection_id}/generate/image/: + post: + summary: Generate image using AI + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - prompt + properties: + prompt: + type: string + example: "A beautiful sunset over mountains" + aspect_ratio: + type: string + example: "16:9" + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Image generation started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /collection/{collection_id}/generate/video/: + post: + summary: Generate video using AI + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - prompt + properties: + prompt: + type: string + example: "A cat playing with a ball" + duration: + type: number + example: 5 + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Video generation started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /collection/{collection_id}/generate/audio/: + post: + summary: Generate audio using AI + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - prompt + - audio_type + properties: + prompt: + type: string + example: "Generate upbeat background music" + audio_type: + type: string + enum: [speech, sound_effect, music] + example: "music" + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Audio generation started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /collection/{collection_id}/generate/text/: + post: + summary: Generate text using AI + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - prompt + properties: + prompt: + type: string + example: "Summarize the content of this video" + video_id: + type: string + example: "m-12345" + model_name: + type: string + example: "gpt-4" + max_tokens: + type: integer + example: 500 + temperature: + type: number + example: 0.7 + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Text generation started or completed + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/AsyncResponse' + - type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + output: + type: string + example: "Generated text content" + + /timeline: + post: + summary: Compile timeline + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - request_type + - timeline + properties: + request_type: + type: string + enum: [compile] + example: "compile" + timeline: + type: array + items: + $ref: '#/components/schemas/Timeline' + responses: + '200': + description: Timeline compilation result + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + stream_url: + type: string + example: "https://stream.videodb.io/compiled/12345" + + /billing/usage: + get: + summary: Get billing usage information + security: + - ApiKeyAuth: [] + responses: + '200': + description: Billing usage data + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/BillingUsage' + + /billing/checkout: + post: + summary: Create billing checkout session + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + mode: + type: string + enum: [payment, subscription] + example: "payment" + plan_id: + type: string + example: "plan-basic" + amount: + type: number + example: 100 + responses: + '200': + description: Checkout URL + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + url: + type: string + example: "https://checkout.stripe.com/pay/xxx" + + /billing/checkouts: + get: + summary: Get billing checkout history + security: + - ApiKeyAuth: [] + parameters: + - name: limit + in: query + schema: + type: integer + example: 10 + - name: offset + in: query + schema: + type: integer + example: 0 + responses: + '200': + description: Checkout history + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: array + items: + type: object + properties: + id: + type: string + example: "cs_test_xxx" + amount: + type: number + example: 100 + currency: + type: string + example: "usd" + status: + type: string + example: "completed" + created_at: + type: string + format: date-time + + /billing/invoices: + get: + summary: Get billing invoices + security: + - ApiKeyAuth: [] + parameters: + - name: limit + in: query + schema: + type: integer + example: 10 + - name: offset + in: query + schema: + type: integer + example: 0 + responses: + '200': + description: Invoice list + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: array + items: + type: object + properties: + id: + type: string + example: "in_xxx" + amount: + type: number + example: 100 + currency: + type: string + example: "usd" + status: + type: string + example: "paid" + pdf_url: + type: string + example: "https://invoice.stripe.com/pdf/xxx" + created_at: + type: string + format: date-time + + /billing/topup: + post: + summary: Create topup payment + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - amount + properties: + amount: + type: number + example: 50 + currency: + type: string + example: "usd" + responses: + '200': + description: Topup checkout URL + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + url: + type: string + example: "https://checkout.stripe.com/pay/xxx" + + /billing/auto_recharge: + get: + summary: Get auto recharge settings + security: + - ApiKeyAuth: [] + responses: + '200': + description: Auto recharge settings + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + enabled: + type: boolean + example: true + threshold: + type: number + example: 10 + amount: + type: number + example: 50 + + post: + summary: Update auto recharge settings + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + enabled: + type: boolean + example: true + threshold: + type: number + example: 10 + amount: + type: number + example: 50 + responses: + '200': + description: Auto recharge updated + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + message: + type: string + example: "Auto recharge settings updated" + + /async-response/{response_id}: + get: + summary: Get async operation result + parameters: + - name: response_id + in: path + required: true + schema: + type: string + example: "job-12345" + responses: + '200': + description: Operation result + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + status: + type: string + enum: [processing, done, failed] + example: "done" + data: + type: object + description: "Result data varies by operation type" + '404': + description: Response not found + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /download: + get: + summary: List download entries + security: + - ApiKeyAuth: [] + parameters: + - name: page_index + in: query + schema: + type: integer + example: 0 + - name: count + in: query + schema: + type: integer + maximum: 5000 + example: 50 + responses: + '200': + description: List of downloads + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + downloads: + type: array + items: + type: object + properties: + id: + type: string + example: "download-12345" + name: + type: string + example: "video_download.mp4" + status: + type: string + enum: [processing, done, error] + example: "done" + created_at: + type: string + format: date-time + download_url: + type: string + example: "https://example.com/download/video.mp4" + + post: + summary: Create download request + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - stream_link + properties: + stream_link: + type: string + example: "https://stream.videodb.io/v/12345" + name: + type: string + example: "my_download.mp4" + responses: + '200': + description: Download initiated + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + delete: + summary: Delete download entry + security: + - ApiKeyAuth: [] + parameters: + - name: download_id + in: query + required: true + schema: + type: string + example: "download-12345" + responses: + '200': + description: Download deleted + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /download/{download_id}: + get: + summary: Get download status/details + security: + - ApiKeyAuth: [] + parameters: + - name: download_id + in: path + required: true + schema: + type: string + example: "download-12345" + responses: + '200': + description: Download status + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/AsyncResponse' + - type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + id: + type: string + example: "download-12345" + name: + type: string + example: "video_download.mp4" + status: + type: string + enum: [processing, done, error] + example: "done" + download_url: + type: string + example: "https://example.com/download/video.mp4" + created_at: + type: string + format: date-time + + post: + summary: Retry download + security: + - ApiKeyAuth: [] + parameters: + - name: download_id + in: path + required: true + schema: + type: string + example: "download-12345" + responses: + '200': + description: Download retry initiated + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /chat/completions: + post: + summary: OpenAI-compatible chat completions proxy + description: Proxy endpoint for OpenAI chat completions API with VideoDB billing + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - model + - messages + properties: + model: + type: string + enum: [gpt-4o-2024-11-20] + example: "gpt-4o-2024-11-20" + messages: + type: array + items: + type: object + properties: + role: + type: string + enum: [system, user, assistant] + example: "user" + content: + type: string + example: "Hello, how are you?" + max_tokens: + type: integer + example: 100 + temperature: + type: number + example: 0.7 + stream: + type: boolean + example: false + responses: + '200': + description: Chat completion response + content: + application/json: + schema: + type: object + properties: + id: + type: string + example: "chatcmpl-123" + object: + type: string + example: "chat.completion" + created: + type: integer + example: 1677652288 + model: + type: string + example: "gpt-4o-2024-11-20" + choices: + type: array + items: + type: object + properties: + index: + type: integer + example: 0 + message: + type: object + properties: + role: + type: string + example: "assistant" + content: + type: string + example: "Hello! I'm doing well, thank you for asking." + finish_reason: + type: string + example: "stop" + usage: + type: object + properties: + prompt_tokens: + type: integer + example: 10 + completion_tokens: + type: integer + example: 15 + total_tokens: + type: integer + example: 25 + + /timeline_v2: + post: + summary: Compile timeline (v2) + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - request_type + - timeline + properties: + request_type: + type: string + enum: [compile] + example: "compile" + timeline: + type: array + items: + $ref: '#/components/schemas/Timeline' + output_format: + type: string + enum: [mp4, webm, hls] + example: "mp4" + quality: + type: string + enum: [low, medium, high] + example: "high" + responses: + '200': + description: Timeline compilation result + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + stream_url: + type: string + example: "https://stream.videodb.io/compiled/12345" + duration: + type: number + example: 120.5 + format: + type: string + example: "mp4" + + /timeline_v2/download: + post: + summary: Download compiled timeline + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - timeline_id + properties: + timeline_id: + type: string + example: "timeline-12345" + format: + type: string + enum: [mp4, webm, avi] + example: "mp4" + quality: + type: string + enum: [low, medium, high] + example: "high" + responses: + '200': + description: Download initiated + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /compile/: + post: + summary: Compile media content + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - inputs + properties: + inputs: + type: array + items: + type: object + properties: + media_id: + type: string + example: "m-12345" + start_time: + type: number + example: 10.0 + end_time: + type: number + example: 30.0 + output_format: + type: string + enum: [mp4, webm, hls] + example: "mp4" + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Compilation started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /video/{video_id}/index/scene/: + get: + summary: Get video scene index status + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + responses: + '200': + description: Scene index status + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + status: + type: string + enum: [done, processing, failed] + example: "done" + data: + type: object + properties: + scene_count: + type: integer + example: 25 + total_duration: + type: number + example: 120.5 + last_updated: + type: string + format: date-time + + post: + summary: Create video scene index + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + scene_type: + type: string + enum: [shot, time_based] + example: "shot" + segmentation_threshold: + type: number + example: 0.8 + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Scene index creation started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /video/{video_id}/index/scene/{scene_index_id}: + get: + summary: Get scene index details + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: scene_index_id + in: path + required: true + schema: + type: string + example: "scene-idx-12345" + responses: + '200': + description: Scene index details + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + id: + type: string + example: "scene-idx-12345" + video_id: + type: string + example: "m-12345" + scene_type: + type: string + example: "shot" + status: + type: string + enum: [done, processing, failed] + example: "done" + scenes: + type: array + items: + type: object + properties: + start_time: + type: number + example: 10.5 + end_time: + type: number + example: 25.3 + confidence: + type: number + example: 0.85 + + delete: + summary: Delete scene index + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: scene_index_id + in: path + required: true + schema: + type: string + example: "scene-idx-12345" + responses: + '200': + description: Scene index deleted + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /audio/{audio_id}/transcription/: + get: + summary: Get audio transcription + tags: + - Audio + security: + - ApiKeyAuth: [] + parameters: + - name: audio_id + in: path + required: true + schema: + type: string + pattern: "^a-" + example: "a-12345" + - name: engine + in: query + schema: + type: string + example: "default" + - name: start + in: query + schema: + type: number + default: 0 + example: 0 + - name: end + in: query + schema: + type: number + default: -1 + example: 60.0 + responses: + '200': + description: Audio transcription data + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + status: + type: string + enum: [completed, processing, failed] + example: "completed" + data: + type: object + properties: + transcript: + type: array + items: + type: object + properties: + text: + type: string + example: "Hello world" + start: + type: number + example: 1.5 + end: + type: number + example: 3.2 + '404': + description: Transcription not found + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + post: + summary: Generate audio transcription + tags: + - Audio + security: + - ApiKeyAuth: [] + parameters: + - name: audio_id + in: path + required: true + schema: + type: string + pattern: "^a-" + example: "a-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + engine: + type: string + default: "default" + example: "default" + language_code: + type: string + default: "en" + example: "en" + force: + type: boolean + default: false + example: false + callback_url: + type: string + example: "https://webhook.example.com/callback" + callback_data: + type: object + responses: + '200': + description: Transcription job started + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/AsyncResponse' + - type: object + properties: + success: + type: boolean + example: true + message: + type: string + example: "transcription already exists" + + /collection/{collection_id}/upload_url: + get: + summary: Get presigned upload URL + tags: + - Collections + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + - name: name + in: query + schema: + type: string + example: "my_video.mp4" + responses: + '200': + description: Upload URL generated + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + upload_url: + type: string + example: "https://s3.amazonaws.com/..." + video_id: + type: string + example: "m-12345" + + /collection/{collection_id}/websocket: + get: + summary: Get WebSocket connection URL + tags: + - Collections + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + responses: + '200': + description: WebSocket URL + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + websocket_url: + type: string + example: "wss://ws.videodb.io/..." + + /collection/{collection_id}/search/title/: + post: + summary: Search by title within collection + tags: + - Search + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - query + properties: + query: + type: string + example: "search query" + search_type: + type: string + default: "llm" + example: "llm" + responses: + '200': + description: Title search results + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: array + items: + type: object + + /collection/{collection_id}/search/web/: + post: + summary: Web search within collection + tags: + - Search + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - query + properties: + query: + type: string + example: "search query" + responses: + '200': + description: Web search results + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + + /collection/{collection_id}/generate/video/dub: + post: + summary: Dub video with AI-generated audio + tags: + - AI Generation + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + video_id: + type: string + example: "m-12345" + target_language: + type: string + example: "es" + callback_url: + type: string + example: "https://webhook.example.com/callback" + callback_data: + type: object + responses: + '200': + description: Dubbing job started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /collection/{collection_id}/video/{video_id}/translate: + post: + summary: Translate video content + tags: + - AI Generation + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + target_language: + type: string + example: "es" + callback_url: + type: string + example: "https://webhook.example.com/callback" + callback_data: + type: object + responses: + '200': + description: Translation job started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /video/{video_id}/scenes/{scene_collection_id}/: + get: + summary: Get scene collection details + tags: + - Videos + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: scene_collection_id + in: path + required: true + schema: + type: string + example: "sc-12345" + - name: offset + in: query + schema: + type: integer + example: 0 + - name: limit + in: query + schema: + type: integer + example: 100 + responses: + '200': + description: Scene collection details + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + id: + type: string + example: "sc-12345" + scenes: + type: array + items: + type: object + properties: + scene_id: + type: string + start: + type: number + end: + type: number + description: + type: string + + patch: + summary: Update scene collection + tags: + - Videos + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: scene_collection_id + in: path + required: true + schema: + type: string + example: "sc-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - scenes + properties: + scenes: + type: array + items: + type: object + responses: + '200': + description: Scene collection updated + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + delete: + summary: Delete scene collection + tags: + - Videos + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: scene_collection_id + in: path + required: true + schema: + type: string + example: "sc-12345" + responses: + '200': + description: Scene collection deleted + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /video/{video_id}/index/delete: + post: + summary: Delete video index + tags: + - Videos + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + index_type: + type: string + enum: [spoken_word, scene, all] + default: "all" + example: "all" + model_name: + type: string + default: "gpt4-v" + example: "gpt4-v" + responses: + '200': + description: Index deleted + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /video/{video_id}/reframe: + post: + summary: Reframe video to different aspect ratio + tags: + - Videos + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - target + - mode + properties: + target: + type: string + example: "9:16" + mode: + type: string + example: "auto" + start: + type: number + default: 0 + example: 0 + end: + type: number + example: 30 + callback_url: + type: string + example: "https://webhook.example.com/callback" + callback_data: + type: object + responses: + '200': + description: Reframe job started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /video/{video_id}/reframe/{reframe_id}: + get: + summary: Get reframe job status + tags: + - Videos + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: reframe_id + in: path + required: true + schema: + type: string + example: "reframe-12345" + responses: + '200': + description: Reframe job status + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + status: + type: string + enum: [processing, done, failed] + example: "done" + data: + type: object + '404': + description: Reframe job not found + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /assets: + get: + summary: List all assets across collections + tags: + - Assets + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: query + schema: + type: string + example: "default" + - name: asset_type + in: query + description: Comma-separated asset types + schema: + type: string + example: "video,audio,image" + - name: sort_by + in: query + schema: + type: string + enum: [name, duration, size, created_at] + default: "created_at" + example: "created_at" + - name: sort_order + in: query + schema: + type: string + enum: [asc, desc] + default: "desc" + example: "desc" + - name: min_duration + in: query + schema: + type: number + example: 10 + - name: max_duration + in: query + schema: + type: number + example: 300 + - name: min_size + in: query + schema: + type: number + example: 1024 + - name: max_size + in: query + schema: + type: number + example: 104857600 + - name: name_pattern + in: query + description: Regex pattern for name filter + schema: + type: string + example: ".*intro.*" + - name: page + in: query + schema: + type: integer + default: 1 + example: 1 + - name: page_size + in: query + schema: + type: integer + default: 50000 + maximum: 50000 + example: 100 + responses: + '200': + description: List of assets + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: array + items: + type: object + + /user/api-key-collections: + put: + summary: Set API key collection scoping + tags: + - Authentication + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - api_key + - collection_ids + properties: + api_key: + type: string + example: "sk-xxx" + collection_ids: + type: array + items: + type: string + example: ["default", "c-12345"] + responses: + '200': + description: API key collections updated + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + '400': + description: Invalid request + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + '403': + description: Pro-only feature + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /editor: + post: + summary: Compile editor timeline + tags: + - Editor + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + description: Timeline compilation payload with tracks and clips + responses: + '200': + description: Timeline compilation started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /editor/download: + post: + summary: Download compiled editor timeline + tags: + - Editor + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - stream_url + properties: + stream_url: + type: string + example: "https://stream.videodb.io/compiled/12345" + responses: + '200': + description: Download URL generated + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + download_url: + type: string + example: "https://download.videodb.io/..." + + /transcode: + post: + summary: Start transcode job + tags: + - Transcode + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - source + properties: + source: + type: string + description: Source video ID + example: "m-12345" + responses: + '200': + description: Transcode job started + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + job_id: + type: string + example: "job-12345" + '400': + description: Invalid request + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /transcode/{job_id}: + get: + summary: Get transcode job status + tags: + - Transcode + security: + - ApiKeyAuth: [] + parameters: + - name: job_id + in: path + required: true + schema: + type: string + example: "job-12345" + responses: + '200': + description: Transcode job details + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + + /collection/{collection_id}/meeting/record: + post: + summary: Record a meeting + tags: + - Meeting + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - meeting_url + properties: + meeting_url: + type: string + example: "https://meet.google.com/abc-def-ghi" + bot_name: + type: string + default: "VideoDB Assistant" + example: "VideoDB Assistant" + meeting_title: + type: string + example: "Weekly standup" + time_zone: + type: string + default: "UTC" + example: "UTC" + bot_image_url: + type: string + example: "https://example.com/bot-avatar.png" + realtime_stream: + type: boolean + default: false + example: false + callback_url: + type: string + example: "https://webhook.example.com/callback" + callback_data: + type: object + responses: + '200': + description: Meeting recording started + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + bot_id: + type: string + example: "bot-12345" + '400': + description: Invalid request + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /collection/{collection_id}/meeting/{bot_id}: + get: + summary: Get meeting recording information + tags: + - Meeting + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + - name: bot_id + in: path + required: true + schema: + type: string + example: "bot-12345" + responses: + '200': + description: Meeting information + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + bot_id: + type: string + example: "bot-12345" + status: + type: string + example: "recording" + video_url: + type: string + example: "https://stream.videodb.io/v/12345" + '400': + description: Invalid meeting ID + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /video/{video_id}/meeting: + get: + summary: Get meeting by video + tags: + - Meeting + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + responses: + '200': + description: Meeting information for video + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + + /collection/{collection_id}/capture/session: + post: + summary: Create capture session + tags: + - Capture + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - end_user_id + properties: + end_user_id: + type: string + example: "user-123" + callback_url: + type: string + example: "https://webhook.example.com/callback" + ws_connection_id: + type: string + example: "conn-123" + metadata: + type: object + responses: + '200': + description: Capture session created + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/CaptureSession' + '400': + description: Validation error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + get: + summary: List capture sessions + tags: + - Capture + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + - name: status + in: query + schema: + type: string + enum: [created, starting, active, stopped, failed] + example: "active" + responses: + '200': + description: List of capture sessions + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + sessions: + type: array + items: + $ref: '#/components/schemas/CaptureSession' + next_page: + type: string + nullable: true + + /collection/{collection_id}/capture/session/{session_id}: + get: + summary: Get capture session details + tags: + - Capture + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + - name: session_id + in: path + required: true + schema: + type: string + example: "capture-12345" + responses: + '200': + description: Capture session details + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/CaptureSession' + '404': + description: Session not found + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /capture/session/token: + post: + summary: Create capture session token + tags: + - Capture + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + user_id: + type: string + description: End user identifier for partner tracking + example: "user-123" + expires_in: + type: integer + description: Token validity in seconds + default: 86400 + example: 86400 + responses: + '200': + description: Session token created + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + token: + type: string + example: "st-xxx" + expires_at: + type: number + example: 1700000000 + expires_in: + type: integer + example: 86400 + + /capture/session/start: + post: + summary: Start capture session + tags: + - Capture + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - session_id + - channels + properties: + session_id: + type: string + example: "capture-12345" + channels: + type: array + items: + type: object + required: + - channel_id + properties: + channel_id: + type: string + example: "mic" + channel_name: + type: string + example: "Microphone" + type: + type: string + enum: [audio, video] + default: "audio" + example: "audio" + store: + type: boolean + default: true + example: true + ws_connection_id: + type: string + example: "conn-123" + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Capture session started + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + session_id: + type: string + example: "capture-12345" + status: + type: string + example: "starting" + '400': + description: Validation error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + '402': + description: Insufficient credits + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /capture/session/{streaming_session_id}: + get: + summary: Get capture session with RTSP URLs + tags: + - Capture + security: + - ApiKeyAuth: [] + parameters: + - name: streaming_session_id + in: path + required: true + schema: + type: string + example: "capture-12345" + responses: + '200': + description: Session details with RTSP URLs + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + session_id: + type: string + example: "capture-12345" + status: + type: string + example: "active" + channels: + type: array + items: + type: object + properties: + channel_id: + type: string + rtsp_url: + type: string + '404': + description: Session not found + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /capture/session/{streaming_session_id}/stop: + post: + summary: Stop capture session + tags: + - Capture + security: + - ApiKeyAuth: [] + parameters: + - name: streaming_session_id + in: path + required: true + schema: + type: string + example: "capture-12345" + responses: + '200': + description: Session stopped + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /rtstream/: + get: + summary: List RTStreams + tags: + - RTStream + security: + - ApiKeyAuth: [] + parameters: + - name: limit + in: query + schema: + type: integer + default: 10 + example: 10 + - name: offset + in: query + schema: + type: integer + default: 0 + example: 0 + - name: status + in: query + schema: + type: string + enum: [connected, stopped] + example: "connected" + - name: name + in: query + description: Filter by name substring + schema: + type: string + example: "my stream" + - name: ordering + in: query + description: Sort field (prefix with - for descending) + schema: + type: string + example: "-created_at" + responses: + '200': + description: List of RTStreams + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + results: + type: array + items: + $ref: '#/components/schemas/RTStream' + count: + type: integer + example: 25 + next: + type: string + nullable: true + example: "/rtstream/?limit=10&offset=10" + previous: + type: string + nullable: true + example: null + + post: + summary: Create RTStream + tags: + - RTStream + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - url + properties: + url: + type: string + description: RTSP or YouTube stream URL + example: "rtsp://example.com:8554/stream" + name: + type: string + example: "My Stream" + collection_id: + type: string + default: "default" + example: "default" + sample_rate: + type: integer + default: 30 + example: 30 + media_types: + type: array + items: + type: string + enum: [video, audio] + example: ["video", "audio"] + store: + type: boolean + default: false + example: false + enable_transcript: + type: boolean + default: true + example: true + ws_connection_id: + type: string + example: "conn-123" + responses: + '201': + description: RTStream created + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/RTStream' + '400': + description: Validation error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + '402': + description: Insufficient credits + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /rtstream/{stream_id}/: + get: + summary: Get RTStream details + tags: + - RTStream + security: + - ApiKeyAuth: [] + parameters: + - name: stream_id + in: path + required: true + schema: + type: string + example: "rts-12345" + responses: + '200': + description: RTStream details + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/RTStream' + '404': + description: Stream not found + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + patch: + summary: Update RTStream + tags: + - RTStream + security: + - ApiKeyAuth: [] + parameters: + - name: stream_id + in: path + required: true + schema: + type: string + example: "rts-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + name: + type: string + example: "Updated Stream Name" + sample_rate: + type: integer + example: 15 + responses: + '200': + description: RTStream updated + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/RTStream' + '400': + description: Invalid fields + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /rtstream/{stream_id}/status/: + patch: + summary: Start or stop RTStream + tags: + - RTStream + security: + - ApiKeyAuth: [] + parameters: + - name: stream_id + in: path + required: true + schema: + type: string + example: "rts-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - action + properties: + action: + type: string + enum: [start, stop] + example: "stop" + responses: + '200': + description: RTStream status updated + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /rtstream/{stream_id}/export: + post: + summary: Export RTStream recording as VideoDB asset + tags: + - RTStream + security: + - ApiKeyAuth: [] + parameters: + - name: stream_id + in: path + required: true + schema: + type: string + example: "rts-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + name: + type: string + example: "Exported Recording" + responses: + '200': + description: Recording exported + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + video_id: + type: string + example: "m-12345" + name: + type: string + example: "Exported Recording" + stream_url: + type: string + example: "https://stream.videodb.io/v/12345" + player_url: + type: string + example: "https://console.videodb.io/player/12345" + duration: + type: number + example: 123.45 + '400': + description: No recordings available + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /rtstream/{stream_id}/index/scene: + post: + summary: Create RTStream scene index + tags: + - RTStream + security: + - ApiKeyAuth: [] + parameters: + - name: stream_id + in: path + required: true + schema: + type: string + example: "rts-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + extraction_type: + type: string + enum: [time, transcript] + default: "time" + example: "time" + extraction_config: + type: object + properties: + time: + type: integer + example: 10 + frame_count: + type: integer + example: 5 + prompt: + type: string + example: "Describe the scene" + model_name: + type: string + example: "GPT4o" + model_config: + type: object + name: + type: string + example: "My Scene Index" + ws_connection_id: + type: string + example: "conn-123" + responses: + '200': + description: Scene index created + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + rtstream_index_id: + type: string + example: "scene-idx-12345" + extraction_type: + type: string + example: "time" + status: + type: string + example: "running" + prompt: + type: string + example: "Describe the scene" + name: + type: string + example: "My Scene Index" + + get: + summary: List RTStream scene indexes + tags: + - RTStream + security: + - ApiKeyAuth: [] + parameters: + - name: stream_id + in: path + required: true + schema: + type: string + example: "rts-12345" + responses: + '200': + description: List of scene indexes + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + scene_indexes: + type: array + items: + type: object + properties: + rtstream_index_id: + type: string + example: "scene-idx-12345" + extraction_type: + type: string + example: "time" + status: + type: string + example: "running" + prompt: + type: string + name: + type: string + + /rtstream/{stream_id}/index/{scene_index_id}: + get: + summary: Get RTStream scene index details + tags: + - RTStream + security: + - ApiKeyAuth: [] + parameters: + - name: stream_id + in: path + required: true + schema: + type: string + example: "rts-12345" + - name: scene_index_id + in: path + required: true + schema: + type: string + example: "scene-idx-12345" + responses: + '200': + description: Scene index details + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + rtstream_index_id: + type: string + example: "scene-idx-12345" + extraction_type: + type: string + example: "time" + extraction_config: + type: object + status: + type: string + example: "running" + prompt: + type: string + name: + type: string + + /rtstream/{stream_id}/index/scene/{scene_index_id}: + get: + summary: Get RTStream scene records + tags: + - RTStream + security: + - ApiKeyAuth: [] + parameters: + - name: stream_id + in: path + required: true + schema: + type: string + example: "rts-12345" + - name: scene_index_id + in: path + required: true + schema: + type: string + example: "scene-idx-12345" + - name: page + in: query + schema: + type: integer + default: 1 + example: 1 + - name: page_size + in: query + schema: + type: integer + default: 100 + example: 100 + - name: start + in: query + description: Filter by start timestamp + schema: + type: number + example: 1700000000 + - name: end + in: query + description: Filter by end timestamp + schema: + type: number + example: 1700003600 + responses: + '200': + description: Scene records + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + scene_index_records: + type: array + items: + type: object + properties: + start: + type: number + example: 1700000000 + end: + type: number + example: 1700000010 + description: + type: string + example: "Scene description" + next_page: + type: boolean + example: false + + patch: + summary: Update RTStream scene index prompt + tags: + - RTStream + security: + - ApiKeyAuth: [] + parameters: + - name: stream_id + in: path + required: true + schema: + type: string + example: "rts-12345" + - name: scene_index_id + in: path + required: true + schema: + type: string + example: "scene-idx-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - prompt + properties: + prompt: + type: string + example: "Updated scene description prompt" + responses: + '200': + description: Scene index prompt updated + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /rtstream/{stream_id}/index/scene/{scene_index_id}/status: + patch: + summary: Update RTStream scene index status + tags: + - RTStream + security: + - ApiKeyAuth: [] + parameters: + - name: stream_id + in: path + required: true + schema: + type: string + example: "rts-12345" + - name: scene_index_id + in: path + required: true + schema: + type: string + example: "scene-idx-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - action + properties: + action: + type: string + enum: [start, stop] + example: "stop" + responses: + '200': + description: Scene index status updated + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /rtstream/{stream_id}/search: + post: + summary: Search RTStream scene index + tags: + - RTStream + security: + - ApiKeyAuth: [] + parameters: + - name: stream_id + in: path + required: true + schema: + type: string + example: "rts-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - query + - scene_index_id + properties: + query: + type: string + example: "person walking" + scene_index_id: + type: string + example: "scene-idx-12345" + result_threshold: + type: integer + default: 10 + example: 10 + score_threshold: + type: number + example: 0.5 + dynamic_score_percentage: + type: integer + default: 20 + example: 20 + stitch: + type: boolean + default: true + example: true + filter: + type: array + items: + type: object + rerank: + type: boolean + default: false + example: false + rerank_params: + type: object + responses: + '200': + description: Search results + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + results: + type: array + items: + type: object + properties: + start: + type: number + example: 1700000000 + end: + type: number + example: 1700000010 + text: + type: string + example: "matching scene text" + score: + type: number + example: 0.95 + scene_index_id: + type: string + example: "scene-idx-12345" + + /rtstream/{stream_id}/stream: + get: + summary: Get RTStream playback URL + tags: + - RTStream + security: + - ApiKeyAuth: [] + parameters: + - name: stream_id + in: path + required: true + schema: + type: string + example: "rts-12345" + - name: start + in: query + description: Start unix timestamp for time-based retrieval + schema: + type: number + example: 1700000000 + - name: end + in: query + description: End unix timestamp for time-based retrieval + schema: + type: number + example: 1700003600 + - name: original_frame_rate + in: query + schema: + type: integer + default: 1 + example: 1 + - name: frame_rate + in: query + schema: + type: integer + default: 1 + example: 1 + responses: + '200': + description: Stream URL + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + stream_url: + type: string + example: "https://stream.videodb.io/rts/12345" + + /rtstream/{stream_id}/transcription/: + get: + summary: Get RTStream transcription data + tags: + - RTStream + security: + - ApiKeyAuth: [] + parameters: + - name: stream_id + in: path + required: true + schema: + type: string + example: "rts-12345" + - name: engine + in: query + schema: + type: string + default: "default" + example: "default" + - name: page + in: query + schema: + type: integer + default: 1 + example: 1 + - name: page_size + in: query + schema: + type: integer + default: 100 + maximum: 1000 + example: 100 + - name: start + in: query + description: Filter by start timestamp + schema: + type: number + example: 1700000000 + - name: end + in: query + description: Filter by end timestamp + schema: + type: number + example: 1700003600 + - name: since + in: query + description: Get only entries newer than this timestamp (for polling) + schema: + type: number + example: 1700000000 + responses: + '200': + description: Transcription data + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + transcription_records: + type: array + items: + type: object + properties: + start: + type: number + example: 1700000000 + end: + type: number + example: 1700000005 + text: + type: string + example: "transcribed text" + word_timestamps: + type: array + items: + type: object + next_page: + type: boolean + example: false + total_count: + type: integer + example: 50 + page: + type: integer + example: 1 + page_size: + type: integer + example: 100 + + post: + summary: Start or stop RTStream transcription + tags: + - RTStream + security: + - ApiKeyAuth: [] + parameters: + - name: stream_id + in: path + required: true + schema: + type: string + example: "rts-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - action + properties: + action: + type: string + enum: [start, stop] + example: "start" + engine: + type: string + default: "default" + example: "default" + ws_connection_id: + type: string + example: "conn-123" + responses: + '200': + description: Transcription status updated + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + status: + type: string + enum: [running, stopped] + example: "running" + engine: + type: string + example: "default" + updated_at: + type: string + format: date-time + + /rtstream/{stream_id}/transcription/status: + get: + summary: Get RTStream transcription status + tags: + - RTStream + security: + - ApiKeyAuth: [] + parameters: + - name: stream_id + in: path + required: true + schema: + type: string + example: "rts-12345" + - name: engine + in: query + schema: + type: string + default: "default" + example: "default" + responses: + '200': + description: Transcription status + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + status: + type: string + enum: [running, stopped, not_configured] + example: "running" + engine: + type: string + example: "default" + language: + type: string + example: "en" + created_at: + type: string + format: date-time + updated_at: + type: string + format: date-time + + /rtstream/event: + post: + summary: Create collection event + tags: + - RTStream + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - event_prompt + - label + properties: + event_prompt: + type: string + example: "Detect when a person enters the room" + label: + type: string + example: "person-entry" + responses: + '200': + description: Event created + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + event_id: + type: string + example: "event-12345" + + get: + summary: List collection events + tags: + - RTStream + security: + - ApiKeyAuth: [] + responses: + '200': + description: List of events + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + events: + type: array + items: + type: object + properties: + event_id: + type: string + example: "event-12345" + event_prompt: + type: string + example: "Detect when a person enters the room" + label: + type: string + example: "person-entry" + + /rtstream/{stream_id}/index/{scene_index_id}/alert: + post: + summary: Create RTStream alert + tags: + - RTStream + security: + - ApiKeyAuth: [] + parameters: + - name: stream_id + in: path + required: true + schema: + type: string + example: "rts-12345" + - name: scene_index_id + in: path + required: true + schema: + type: string + example: "scene-idx-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - event_id + properties: + event_id: + type: string + example: "event-12345" + callback_url: + type: string + example: "https://webhook.example.com/alert" + ws_connection_id: + type: string + example: "conn-123" + responses: + '200': + description: Alert created + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + alert_id: + type: string + example: "alert-12345" + + get: + summary: List RTStream alerts + tags: + - RTStream + security: + - ApiKeyAuth: [] + parameters: + - name: stream_id + in: path + required: true + schema: + type: string + example: "rts-12345" + - name: scene_index_id + in: path + required: true + schema: + type: string + example: "scene-idx-12345" + responses: + '200': + description: List of alerts + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + alerts: + type: array + items: + type: object + properties: + alert_id: + type: string + example: "alert-12345" + event_id: + type: string + example: "event-12345" + prompt: + type: string + example: "Detect when a person enters the room" + label: + type: string + example: "person-entry" + callback_url: + type: string + example: "https://webhook.example.com/alert" + status: + type: string + enum: [enabled, disabled] + example: "enabled" + + /rtstream/{stream_id}/index/{scene_index_id}/alert/{alert_id}/status: + patch: + summary: Update RTStream alert status + tags: + - RTStream + security: + - ApiKeyAuth: [] + parameters: + - name: stream_id + in: path + required: true + schema: + type: string + example: "rts-12345" + - name: scene_index_id + in: path + required: true + schema: + type: string + example: "scene-idx-12345" + - name: alert_id + in: path + required: true + schema: + type: string + example: "alert-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - status + properties: + status: + type: string + enum: [enabled, disabled] + example: "disabled" + responses: + '200': + description: Alert status updated + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + +tags: + - name: Authentication + description: User authentication and API key management + - name: Collections + description: Collection management operations + - name: Videos + description: Video upload, processing, and management + - name: Audio + description: Audio management operations + - name: Images + description: Image management operations + - name: Search + description: Content search and indexing + - name: AI Generation + description: AI-powered content generation + - name: Billing + description: Billing and usage management + - name: RTStream + description: Real-time streaming operations + - name: Utilities + description: Utility endpoints + - name: Meeting + description: Meeting recording and management + - name: Capture + description: Capture session management for recording streams + - name: Editor + description: Timeline editor operations + - name: Transcode + description: Media transcoding operations + - name: Assets + description: Cross-collection asset listing \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 1159ddd..705f032 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ requests==2.31.0 backoff==2.2.1 tqdm==4.66.1 +websockets>=12.0 diff --git a/setup.py b/setup.py index db2f955..8123174 100644 --- a/setup.py +++ b/setup.py @@ -28,13 +28,19 @@ long_description=long_description, long_description_content_type="text/markdown", url=about["__url__"], - packages=find_packages(exclude=["tests", "tests.*"]), + packages=find_packages( + exclude=["tests", "tests.*", "capture_bin", "videodb_capture_bin"] + ), python_requires=">=3.8", install_requires=[ "requests>=2.25.1", "backoff>=2.2.1", "tqdm>=4.66.1", + "websockets>=11.0.3", ], + extras_require={ + "capture": ["videodb-capture-bin>=0.2.7"], + }, classifiers=[ "Intended Audience :: Developers", "Programming Language :: Python :: 3", diff --git a/videodb/__about__.py b/videodb/__about__.py index 89bbe9e..e73cffb 100644 --- a/videodb/__about__.py +++ b/videodb/__about__.py @@ -2,7 +2,7 @@ -__version__ = "0.3.0" +__version__ = "0.4.0" __title__ = "videodb" __author__ = "videodb" __email__ = "contact@videodb.io" diff --git a/videodb/__init__.py b/videodb/__init__.py index 41244dc..a5adc9c 100644 --- a/videodb/__init__.py +++ b/videodb/__init__.py @@ -20,8 +20,15 @@ ResizeMode, VideoConfig, AudioConfig, + ReframeMode, + SegmentationType, + RTStreamChannelType, ) from videodb.client import Connection +from videodb.capture_session import CaptureSession +from videodb.websocket_client import WebSocketConnection +from videodb.capture import CaptureClient, Channel, AudioChannel, VideoChannel, Channels, ChannelList + from videodb.exceptions import ( VideodbError, AuthenticationError, @@ -33,6 +40,15 @@ __all__ = [ + "connect", + "CaptureSession", + "WebSocketConnection", + "CaptureClient", + "Channel", + "AudioChannel", + "VideoChannel", + "Channels", + "ChannelList", "VideodbError", "AuthenticationError", "InvalidRequestError", @@ -51,11 +67,15 @@ "ResizeMode", "VideoConfig", "AudioConfig", + "ReframeMode", + "SegmentationType", + "RTStreamChannelType", ] def connect( api_key: str = None, + session_token: str = None, base_url: Optional[str] = VIDEO_DB_API, log_level: Optional[int] = logging.INFO, **kwargs, @@ -63,6 +83,7 @@ def connect( """A client for interacting with a videodb via REST API :param str api_key: The api key to use for authentication + :param str session_token: The session token to use for authentication (alternative to api_key) :param str base_url: (optional) The base url to use for the api :param int log_level: (optional) The log level to use for the logger :return: A connection object @@ -70,11 +91,14 @@ def connect( """ logger.setLevel(log_level) - if api_key is None: + + # Determine which token to use + if api_key is None and session_token is None: api_key = os.environ.get("VIDEO_DB_API_KEY") - if api_key is None: + + if api_key is None and session_token is None: raise AuthenticationError( - "No API key provided. Set an API key either as an environment variable (VIDEO_DB_API_KEY) or pass it as an argument." + "No authentication provided. Set an API key (VIDEO_DB_API_KEY) or provide api_key/session_token as an argument." ) - return Connection(api_key, base_url, **kwargs) + return Connection(api_key=api_key, session_token=session_token, base_url=base_url, **kwargs) diff --git a/videodb/_constants.py b/videodb/_constants.py index 0654846..287b10e 100644 --- a/videodb/_constants.py +++ b/videodb/_constants.py @@ -1,17 +1,24 @@ """Constants used in the videodb package.""" +from enum import Enum from typing import Union from dataclasses import dataclass VIDEO_DB_API: str = "https://api.videodb.io" -class MediaType: +class MediaType(str, Enum): video = "video" audio = "audio" image = "image" +class RTStreamChannelType: + mic = "mic" + screen = "screen" + system_audio = "system_audio" + + class SearchType: semantic = "semantic" keyword = "keyword" @@ -27,6 +34,7 @@ class IndexType: class SceneExtractionType: shot_based = "shot" time_based = "time" + transcript = "transcript" class Workflows: @@ -49,6 +57,11 @@ class Segmenter: sentence = "sentence" +class SegmentationType: + sentence = "sentence" + llm = "llm" + + class ApiPath: collection = "collection" upload = "upload" @@ -91,6 +104,12 @@ class ApiPath: record = "record" editor = "editor" reframe = "reframe" + clip = "clip" + capture = "capture" + session = "session" + token = "token" + websocket = "websocket" + export = "export" class Status: @@ -101,6 +120,7 @@ class Status: class MeetingStatus: initializing = "initializing" processing = "processing" + joined = "joined" done = "done" diff --git a/videodb/_upload.py b/videodb/_upload.py index 399d527..ebb937c 100644 --- a/videodb/_upload.py +++ b/videodb/_upload.py @@ -29,6 +29,7 @@ def upload( callback_url: Optional[str] = None, file_path: Optional[str] = None, url: Optional[str] = None, + collection_id: Optional[str] = None, ) -> dict: """Upload a file or URL. @@ -40,9 +41,12 @@ def upload( :param str callback_url: URL to receive the callback (optional) :param str file_path: Path to the file to be uploaded :param str url: URL of the file to be uploaded + :param str collection_id: ID of the collection to upload to (optional) :return: Dictionary containing upload response data :rtype: dict """ + collection_id = collection_id or _connection.collection_id + if source and (file_path or url): raise VideodbError("source cannot be used with file_path or url") @@ -68,7 +72,7 @@ def upload( try: name = file_path.split("/")[-1].split(".")[0] if not name else name upload_url_data = _connection.get( - path=f"{ApiPath.collection}/{_connection.collection_id}/{ApiPath.upload_url}", + path=f"{ApiPath.collection}/{collection_id}/{ApiPath.upload_url}", params={"name": name}, ) upload_url = upload_url_data.get("upload_url") @@ -85,7 +89,7 @@ def upload( raise VideodbError("Error while uploading file", cause=e) upload_data = _connection.post( - path=f"{ApiPath.collection}/{_connection.collection_id}/{ApiPath.upload}", + path=f"{ApiPath.collection}/{collection_id}/{ApiPath.upload}", data={ "url": url, "name": name, diff --git a/videodb/capture.py b/videodb/capture.py new file mode 100644 index 0000000..6bad759 --- /dev/null +++ b/videodb/capture.py @@ -0,0 +1,406 @@ +import logging +import asyncio +import json +import uuid +import os +from typing import Optional, Dict, List, Any + +from videodb._constants import VIDEO_DB_API + +logger = logging.getLogger(__name__) + +def get_recorder_path(): + """ + Attempts to find the path to the recorder binary. + If the optional 'videodb-capture-bin' package is not installed, + it raises a RuntimeError with instructions. + """ + try: + import videodb_capture_bin + return videodb_capture_bin.get_binary_path() + except ImportError: + error_msg = ( + "Capture runtime not found.\n" + "To use recording features, please install the capture dependencies:\n" + "pip install 'videodb[capture]'" + ) + logger.error(error_msg) + raise RuntimeError(error_msg) + except Exception as e: + logger.error(f"Failed to resolve recorder path: {e}") + raise + + +class Channel: + """Base class for capture channels.""" + + def __init__( + self, + id: str, + name: str, + type: str, + client: Optional["CaptureClient"] = None, + ): + """Object representing a capture channel. + + :param str id: The unique ID of the channel. + :param str name: The display name of the channel. + :param str type: The type of the channel (audio/video). + :param CaptureClient client: Reference to the capture client. + """ + self.id = id + self.name = name + self.type = type + self._client = client + self.store = False + + def __repr__(self): + return f"Channel(id={self.id}, name={self.name}, type={self.type})" + + async def pause(self) -> None: + """Pause recording for this channel.""" + if not self._client: + raise RuntimeError("Channel not bound to a CaptureClient") + + track_map = { + "audio": "mic" if "mic" in self.id else "system_audio", + "video": "screen", + } + track = track_map.get(self.type) + if track: + await self._client._send_command("pauseTracks", {"tracks": [track]}) + + async def resume(self) -> None: + """Resume recording for this channel.""" + if not self._client: + raise RuntimeError("Channel not bound to a CaptureClient") + + track_map = { + "audio": "mic" if "mic" in self.id else "system_audio", + "video": "screen", + } + track = track_map.get(self.type) + if track: + await self._client._send_command("resumeTracks", {"tracks": [track]}) + + def to_dict(self) -> Dict[str, Any]: + """Return dictionary representation of the channel.""" + return { + "channel_id": self.id, + "type": self.type, + "name": self.name, + "record": True, + "store": self.store, + } + + +class AudioChannel(Channel): + """Represents an audio source channel.""" + + def __init__(self, id: str, name: str, client: Optional["CaptureClient"] = None): + super().__init__(id, name, type="audio", client=client) + + def __repr__(self): + return f"AudioChannel(id={self.id}, name={self.name})" + + +class VideoChannel(Channel): + """Represents a video source channel.""" + + def __init__(self, id: str, name: str, client: Optional["CaptureClient"] = None): + super().__init__(id, name, type="video", client=client) + + def __repr__(self): + return f"VideoChannel(id={self.id}, name={self.name})" + + +class ChannelList(list): + """List subclass with a default property for channel collections.""" + + @property + def default(self) -> Optional[Channel]: + """Get the first (default) channel, or None if empty.""" + return self[0] if self else None + + +class Channels: + """Container for available channels, grouped by type.""" + + def __init__( + self, + mics: List[AudioChannel] = None, + displays: List[VideoChannel] = None, + system_audio: List[AudioChannel] = None, + ): + self.mics: ChannelList = ChannelList(mics or []) + self.displays: ChannelList = ChannelList(displays or []) + self.system_audio: ChannelList = ChannelList(system_audio or []) + + def __repr__(self): + return ( + f"Channels(" + f"mics={len(self.mics)}, " + f"displays={len(self.displays)}, " + f"system_audio={len(self.system_audio)})" + ) + + def all(self) -> List[Channel]: + """Return a flat list of all channels.""" + return list(self.mics) + list(self.displays) + list(self.system_audio) + + +class CaptureClient: + """Client for managing local capture sessions.""" + + def __init__( + self, + client_token: str, + base_url: Optional[str] = None, + ): + """Initialize the capture client. + + :param str client_token: Client token for the capture session. + :param str base_url: VideoDB API endpoint URL. + """ + self.client_token = client_token + self.base_url = base_url or os.environ.get("VIDEO_DB_API", VIDEO_DB_API) + self._session_id: Optional[str] = None + self._proc = None + self._futures: Dict[str, asyncio.Future] = {} + self._binary_path = get_recorder_path() + self._event_queue = asyncio.Queue() + + def __repr__(self) -> str: + return f"CaptureClient(base_url={self.base_url})" + + async def _ensure_process(self): + """Ensure the recorder binary is running.""" + if self._proc is not None and self._proc.returncode is None: + return + + self._proc = await asyncio.create_subprocess_exec( + self._binary_path, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + asyncio.create_task(self._read_stdout_loop()) + asyncio.create_task(self._read_stderr_loop()) + + await self._send_command("init", {"apiUrl": self.base_url}) + + + async def _send_command( + self, command: str, params: Optional[Dict[str, Any]] = None + ) -> Dict[str, Any]: + """Send a command to the recorder binary and await response. + + :param str command: Command name. + :param dict params: Command parameters. + :return: Response result. + :rtype: dict + """ + await self._ensure_process() + + command_id = str(uuid.uuid4()) + payload = { + "command": command, + "commandId": command_id, + "params": params or {}, + } + + # Framing: videodb_recorder|\n + message = f"videodb_recorder|{json.dumps(payload)}\n" + self._proc.stdin.write(message.encode("utf-8")) + await self._proc.stdin.drain() + + # Create future to await response + loop = asyncio.get_running_loop() + future = loop.create_future() + self._futures[command_id] = future + + try: + return await future + finally: + self._futures.pop(command_id, None) + + async def _read_stdout_loop(self): + """Loop to read stdout and process messages.""" + while True: + line = await self._proc.stdout.readline() + if not line: + break + + line_str = line.decode("utf-8", errors="replace").strip() + if not line_str.startswith("videodb_recorder|"): + continue + + try: + json_part = line_str.split("|", 1)[1] + data = json.loads(json_part) + + msg_type = data.get("type") + if msg_type == "response": + cmd_id = data.get("commandId") + if cmd_id in self._futures: + if data.get("status") == "success": + self._futures[cmd_id].set_result(data.get("result")) + else: + self._futures[cmd_id].set_exception( + RuntimeError(data.get("result", "Unknown error")) + ) + elif msg_type == "event": + await self._event_queue.put(data) + + except Exception as e: + logger.error(f"Failed to parse recorder message: {e}") + + async def _read_stderr_loop(self): + """Loop to read stderr and log messages.""" + while True: + line = await self._proc.stderr.readline() + if not line: + break + logger.debug(f"[Recorder Binary]: {line.decode('utf-8', errors='replace').strip()}") + + async def shutdown(self): + """Cleanly terminate the recorder binary process.""" + if self._proc: + try: + # Try graceful shutdown command first + await self._send_command("shutdown") + except Exception: + pass + + try: + self._proc.terminate() + await self._proc.wait() + except Exception: + pass + self._proc = None + + # Valid permission types + VALID_PERMISSIONS = {"microphone", "screen_capture"} + + async def request_permission(self, kind: str) -> bool: + """Request system permissions. + + :param str kind: One of "microphone", "screen_capture" + :return: True if granted, False if denied + :raises ValueError: If kind is not a valid permission type + """ + # Validate permission type + if kind not in self.VALID_PERMISSIONS: + raise ValueError( + f"Invalid permission type: '{kind}'. " + f"Valid types: {', '.join(sorted(self.VALID_PERMISSIONS))}" + ) + + # Map python-friendly names to binary-expected names + # e.g. "screen_capture" -> "screen-capture" + permission_map = { + "screen_capture": "screen-capture", + } + binary_kind = permission_map.get(kind, kind) + result = await self._send_command("requestPermission", {"permission": binary_kind}) + + # Binary returns {"requested": True} to confirm the request was initiated + # or may return {"status": "granted"} if already granted. + if result.get("requested") is True: + return True + + status = result.get("status") + if status == "granted": + return True + elif status == "denied": + logger.warning(f"Permission '{kind}' was denied.") + return False + + return False + + async def list_channels(self) -> Channels: + """Query the system for available audio and video channels. + + :return: Channels object with grouped collections (mics, displays, system_audio). + :rtype: Channels + """ + response = await self._send_command("getChannels") + raw_channels = response.get("channels", []) + + mics = [] + displays = [] + system_audio = [] + + for ch in raw_channels: + c_type = ch.get("type") + c_id = ch.get("channel_id") or ch.get("id") + c_name = ch.get("name", "") + + if not c_id: + logger.warning(f"Skipping channel with missing ID: {ch}") + continue + + # Categorize based on type and name patterns + if c_type == "video": + displays.append(VideoChannel(id=c_id, name=c_name, client=self)) + elif c_type == "audio": + # Distinguish between mic and system audio based on common patterns + name_lower = c_name.lower() + if "system" in name_lower or "output" in name_lower or "speaker" in name_lower: + system_audio.append(AudioChannel(id=c_id, name=c_name, client=self)) + else: + mics.append(AudioChannel(id=c_id, name=c_name, client=self)) + else: + logger.debug(f"Unknown channel type '{c_type}' for channel '{c_name}'") + + return Channels(mics=mics, displays=displays, system_audio=system_audio) + + async def start_session( + self, + capture_session_id: str, + channels: List[Channel], + primary_video_channel_id: Optional[str] = None, + ) -> None: + """Start the recording session. + + :param str capture_session_id: The ID of the capture session. + :param list[Channel] channels: List of Channel objects to record. + :param str primary_video_channel_id: ID of the primary video channel. + :raises ValueError: If no channels are specified. + """ + if not channels: + raise ValueError("At least one channel must be specified for capture.") + + self._session_id = capture_session_id + + payload = { + "sessionId": capture_session_id, + "uploadToken": self.client_token, + "channels": [ch.to_dict() for ch in channels], + } + + if primary_video_channel_id: + payload["primary_video_channel_id"] = primary_video_channel_id + + await self._send_command("startRecording", payload) + + async def stop_session(self) -> None: + """Stop the current recording session.""" + if not self._session_id: + raise RuntimeError("No active capture session to stop.") + await self._send_command("stopRecording", {"sessionId": self._session_id}) + + async def events(self): + """Async generator that yields events from the recorder.""" + while True: + try: + # Use a timeout so we can check if the process is still alive + event = await asyncio.wait_for(self._event_queue.get(), timeout=1.0) + yield event + except asyncio.TimeoutError: + if self._proc is None or self._proc.returncode is not None: + break + continue + except Exception: + break diff --git a/videodb/capture_session.py b/videodb/capture_session.py new file mode 100644 index 0000000..12671b9 --- /dev/null +++ b/videodb/capture_session.py @@ -0,0 +1,61 @@ +from typing import List +from videodb.rtstream import RTStream + + +class CaptureSession: + """CaptureSession class representing a capture session. + + :ivar str id: Unique identifier for the session + :ivar str collection_id: ID of the collection this session belongs to + :ivar str end_user_id: ID of the end user + :ivar str client_id: Client-provided session ID + :ivar str status: Current status of the session + """ + + def __init__(self, _connection, id: str, collection_id: str, **kwargs) -> None: + self._connection = _connection + self.id = id + self.collection_id = collection_id + self._update_attributes(kwargs) + + def __repr__(self) -> str: + return ( + f"CaptureSession(" + f"id={self.id}, " + f"status={getattr(self, 'status', None)}, " + f"collection_id={self.collection_id}, " + f"end_user_id={getattr(self, 'end_user_id', None)})" + ) + + def _update_attributes(self, data: dict) -> None: + """Update instance attributes from API response data.""" + self.end_user_id = data.get("end_user_id") + self.client_id = data.get("client_id") + self.status = data.get("status") + self.callback_url = data.get("callback_url") + self.exported_video_id = data.get("exported_video_id") + self.metadata = data.get("metadata", {}) + + self.rtstreams = [] + for rts_data in data.get("rtstreams", []): + if not isinstance(rts_data, dict): + continue + stream = RTStream(self._connection, **rts_data) + self.rtstreams.append(stream) + + def get_rtstream(self, category: str) -> List[RTStream]: + """Get list of RTStreams by category. + + :param str category: Category to filter by. Use :class:`RTStreamChannelType` constants: + ``RTStreamChannelType.mic``, ``RTStreamChannelType.screen``, ``RTStreamChannelType.system_audio``. + :return: List of :class:`RTStream ` objects + :rtype: List[:class:`videodb.rtstream.RTStream`] + """ + filtered_streams = [] + + for stream in self.rtstreams: + channel_id = getattr(stream, "channel_id", "") or "" + if str(channel_id).lower() == category.lower(): + filtered_streams.append(stream) + + return filtered_streams diff --git a/videodb/client.py b/videodb/client.py index a01d10c..7eac7bb 100644 --- a/videodb/client.py +++ b/videodb/client.py @@ -19,6 +19,8 @@ from videodb.audio import Audio from videodb.image import Image from videodb.meeting import Meeting +from videodb.capture_session import CaptureSession +from videodb.websocket_client import WebSocketConnection from videodb._upload import ( upload, @@ -30,23 +32,28 @@ class Connection(HttpClient): """Connection class to interact with the VideoDB""" - def __init__(self, api_key: str, base_url: str, **kwargs) -> "Connection": + def __init__(self, api_key: str = None, session_token: str = None, base_url: str = None, **kwargs) -> "Connection": """Initializes a new instance of the Connection class with specified API credentials. Note: Users should not initialize this class directly. Instead use :meth:`videodb.connect() ` :param str api_key: API key for authentication + :param str session_token: Session token for authentication (alternative to api_key) :param str base_url: Base URL of the VideoDB API - :raise ValueError: If the API key is not provided + :raise ValueError: If neither API key nor session token is provided :return: :class:`Connection ` object, to interact with the VideoDB :rtype: :class:`videodb.client.Connection` """ + # Use whichever token is provided + access_token = api_key or session_token + self.api_key = api_key + self.session_token = session_token self.base_url = base_url self.collection_id = "default" super().__init__( - api_key=api_key, base_url=base_url, version=__version__, **kwargs + api_key=access_token, base_url=base_url, version=__version__, **kwargs ) def get_collection(self, collection_id: Optional[str] = "default") -> Collection: @@ -347,3 +354,143 @@ def get_meeting(self, meeting_id: str) -> Meeting: meeting = Meeting(self, id=meeting_id, collection_id="default") meeting.refresh() return meeting + + def create_capture_session( + self, + end_user_id: str, + collection_id: str = "default", + callback_url: str = None, + ws_connection_id: str = None, + metadata: dict = None, + ) -> CaptureSession: + """Create a capture session. + + :param str end_user_id: ID of the end user + :param str collection_id: ID of the collection (default: "default") + :param str callback_url: URL to receive callback (optional) + :param str ws_connection_id: WebSocket connection ID (optional) + :param dict metadata: Custom metadata (optional) + :return: :class:`CaptureSession ` object + :rtype: :class:`videodb.capture_session.CaptureSession` + """ + data = { + "end_user_id": end_user_id, + } + if callback_url: + data["callback_url"] = callback_url + if ws_connection_id: + data["ws_connection_id"] = ws_connection_id + if metadata: + data["metadata"] = metadata + + response = self.post( + path=f"{ApiPath.collection}/{collection_id}/{ApiPath.capture}/{ApiPath.session}", + data=data, + ) + # Extract id and collection_id from response to avoid duplicate arguments + session_id = response.pop("session_id", None) or response.pop("id", None) + response_collection_id = response.pop("collection_id", collection_id) + return CaptureSession( + self, id=session_id, collection_id=response_collection_id, **response + ) + + def get_capture_session( + self, session_id: str, collection_id: str = "default" + ) -> CaptureSession: + """Get a capture session by its ID. + + :param str session_id: ID of the capture session + :param str collection_id: ID of the collection (default: "default") + :return: :class:`CaptureSession ` object + :rtype: :class:`videodb.capture_session.CaptureSession` + """ + response = self.get( + path=f"{ApiPath.collection}/{collection_id}/{ApiPath.capture}/{ApiPath.session}/{session_id}" + ) + + # If response is wrapped in 'data', extract it + if "data" in response and isinstance(response["data"], dict): + response = response["data"] + + # Normalize rtstreams before passing to CaptureSession + for rts in response.get("rtstreams", []): + if isinstance(rts, dict): + if "rtstream_id" in rts and "id" not in rts: + rts["id"] = rts.pop("rtstream_id") + if "collection_id" not in rts: + rts["collection_id"] = collection_id + + # Extract id and collection_id from response to avoid duplicate arguments + response.pop("id", None) # Remove id from response + response.pop("collection_id", None) # Remove collection_id from response + + return CaptureSession( + self, id=session_id, collection_id=collection_id, **response + ) + + def list_capture_sessions( + self, + collection_id: str = "default", + status: str = None, + ) -> list[CaptureSession]: + """List capture sessions. + + :param str collection_id: ID of the collection (default: "default") + :param str status: Filter sessions by status (optional) + :return: List of :class:`CaptureSession ` objects + :rtype: list[:class:`videodb.capture_session.CaptureSession`] + """ + params = {} + if status: + params["status"] = status + + response = self.get( + path=f"{ApiPath.collection}/{collection_id}/{ApiPath.capture}/{ApiPath.session}", + params=params, + ) + + sessions = [] + for session_data in response.get("sessions", []): + session_id = session_data.pop("id", None) or session_data.pop( + "session_id", None + ) + # Normalize rtstreams + for rts in session_data.get("rtstreams", []): + if isinstance(rts, dict): + if "rtstream_id" in rts and "id" not in rts: + rts["id"] = rts.pop("rtstream_id") + if "collection_id" not in rts: + rts["collection_id"] = collection_id + # Remove collection_id from data + session_data.pop("collection_id", None) + sessions.append( + CaptureSession( + self, id=session_id, collection_id=collection_id, **session_data + ) + ) + return sessions + + def connect_websocket(self, collection_id: str = "default") -> WebSocketConnection: + """Connect to the VideoDB WebSocket service. + + :param str collection_id: ID of the collection (default: "default") + :return: :class:`WebSocketConnection ` object + :rtype: :class:`videodb.websocket_client.WebSocketConnection` + """ + path = f"{ApiPath.collection}/{collection_id}/{ApiPath.websocket}" + response = self.get(path=path) + websocket_url = response.get("websocket_url") + return WebSocketConnection(url=websocket_url) + + def generate_client_token(self, expires_in: int = 86400) -> str: + """Generate a client token for capture operations. + + :param int expires_in: Expiration time in seconds (default: 86400) + :return: Client token string + :rtype: str + """ + response = self.post( + path=f"{ApiPath.capture}/{ApiPath.session}/{ApiPath.token}", + data={"expires_in": expires_in}, + ) + return response.get("token") diff --git a/videodb/collection.py b/videodb/collection.py index 994cda6..2ca76d6 100644 --- a/videodb/collection.py +++ b/videodb/collection.py @@ -7,13 +7,15 @@ from videodb._constants import ( ApiPath, IndexType, + MediaType, SearchType, ) from videodb.video import Video from videodb.audio import Audio from videodb.image import Image from videodb.meeting import Meeting -from videodb.rtstream import RTStream +from videodb.capture_session import CaptureSession +from videodb.rtstream import RTStream, RTStreamSearchResult, RTStreamShot from videodb.search import SearchFactory, SearchResult logger = logging.getLogger(__name__) @@ -167,23 +169,56 @@ def delete_image(self, image_id: str) -> None: ) def connect_rtstream( - self, url: str, name: str, sample_rate: int = None + self, + url: str, + name: str, + media_types: List[str] = None, + sample_rate: int = None, + store: bool = None, + enable_transcript: bool = None, + ws_connection_id: str = None, ) -> RTStream: """Connect to an rtstream. :param str url: URL of the rtstream :param str name: Name of the rtstream - :param int sample_rate: Sample rate of the rtstream (optional) + :param list media_types: List of media types to capture (default: [MediaType.video]). + Valid values: :attr:`MediaType.audio`, :attr:`MediaType.video` + :param int sample_rate: Sample rate of the rtstream (optional, server default: 30) + :param bool store: Enable recording storage (optional, default: False). + When True, the stream recording is stored and can be exported via :meth:`RTStream.export`. + :param bool enable_transcript: Enable real-time transcription (optional) + :param str ws_connection_id: WebSocket connection ID for receiving events (optional) :return: :class:`RTStream ` object """ + if media_types is None: + media_types = [MediaType.video] + + valid = {MediaType.audio, MediaType.video} + invalid = set(media_types) - valid + if invalid or not media_types: + raise ValueError( + f"Invalid media_types: {invalid}. Valid values: {MediaType.audio}, {MediaType.video}" + ) + + data = { + "collection_id": self.id, + "url": url, + "name": name, + "media_types": media_types, + } + if sample_rate is not None: + data["sample_rate"] = sample_rate + if store is not None: + data["store"] = store + if enable_transcript is not None: + data["enable_transcript"] = enable_transcript + if ws_connection_id is not None: + data["ws_connection_id"] = ws_connection_id + rtstream_data = self._connection.post( path=f"{ApiPath.rtstream}", - data={ - "collection_id": self.id, - "url": url, - "name": name, - "sample_rate": sample_rate, - }, + data=data, ) return RTStream(self._connection, **rtstream_data) @@ -199,14 +234,34 @@ def get_rtstream(self, id: str) -> RTStream: ) return RTStream(self._connection, **rtstream_data) - def list_rtstreams(self) -> List[RTStream]: + def list_rtstreams( + self, + limit: Optional[int] = None, + offset: Optional[int] = None, + status: Optional[str] = None, + name: Optional[str] = None, + ordering: Optional[str] = None, + ) -> List[RTStream]: """List all rtstreams in the collection. + :param int limit: Number of rtstreams to return (optional) + :param int offset: Number of rtstreams to skip (optional) + :param str status: Filter by status (optional) + :param str name: Filter by name (optional) + :param str ordering: Order results by field (optional) :return: List of :class:`RTStream ` objects :rtype: List[:class:`videodb.rtstream.RTStream`] """ + params = { + "limit": limit, + "offset": offset, + "status": status, + "name": name, + "ordering": ordering, + } rtstreams_data = self._connection.get( path=f"{ApiPath.rtstream}", + params={key: value for key, value in params.items() if value is not None}, ) return [ RTStream(self._connection, **rtstream) @@ -413,7 +468,9 @@ def search( score_threshold: Optional[float] = None, dynamic_score_percentage: Optional[float] = None, filter: List[Dict[str, Any]] = [], - ) -> SearchResult: + namespace: Optional[str] = None, + scene_index_id: Optional[str] = None, + ) -> Union[SearchResult, RTStreamSearchResult]: """Search for a query in the collection. :param str query: Query to search for @@ -422,10 +479,50 @@ def search( :param int result_threshold: Number of results to return (optional) :param float score_threshold: Threshold score for the search (optional) :param float dynamic_score_percentage: Percentage of dynamic score to consider (optional) + :param list filter: Additional metadata filters (optional) + :param str namespace: Search namespace (optional, "rtstream" to search RTStreams) + :param str scene_index_id: Filter by specific scene index (optional) :raise SearchError: If the search fails - :return: :class:`SearchResult ` object - :rtype: :class:`videodb.search.SearchResult` + :return: :class:`SearchResult ` or + :class:`RTStreamSearchResult ` object + :rtype: Union[:class:`videodb.search.SearchResult`, + :class:`videodb.rtstream.RTStreamSearchResult`] """ + if namespace == "rtstream": + data = {"query": query} + if scene_index_id is not None: + data["scene_index_id"] = scene_index_id + if result_threshold is not None: + data["result_threshold"] = result_threshold + if score_threshold is not None: + data["score_threshold"] = score_threshold + if dynamic_score_percentage is not None: + data["dynamic_score_percentage"] = dynamic_score_percentage + if filter is not None: + data["filter"] = filter + + search_data = self._connection.post( + path=f"{ApiPath.rtstream}/{ApiPath.collection}/{self.id}/{ApiPath.search}", + data=data, + ) + results = search_data.get("results", []) + shots = [ + RTStreamShot( + _connection=self._connection, + rtstream_id=result.get("rtstream_id") or result.get("id"), + rtstream_name=result.get("rtstream_name"), + start=result.get("start"), + end=result.get("end"), + text=result.get("text"), + search_score=result.get("score"), + scene_index_id=result.get("scene_index_id"), + scene_index_name=result.get("scene_index_name"), + metadata=result.get("metadata"), + ) + for result in results + ] + return RTStreamSearchResult(collection_id=self.id, shots=shots) + search = SearchFactory(self._connection).get_search(search_type) return search.search_inside_collection( collection_id=self.id, @@ -482,6 +579,7 @@ def upload( callback_url=callback_url, file_path=file_path, url=url, + collection_id=self.id, ) media_id = upload_data.get("id", "") if media_id.startswith("m-"): @@ -565,3 +663,111 @@ def get_meeting(self, meeting_id: str) -> Meeting: meeting = Meeting(self._connection, id=meeting_id, collection_id=self.id) meeting.refresh() return meeting + + def create_capture_session( + self, + end_user_id: str, + callback_url: str = None, + ws_connection_id: str = None, + metadata: dict = None, + ) -> "CaptureSession": + """Create a capture session. + + :param str end_user_id: ID of the end user + :param str callback_url: URL to receive callback (optional) + :param str ws_connection_id: WebSocket connection ID (optional) + :param dict metadata: Custom metadata (optional) + :return: :class:`CaptureSession ` object + :rtype: :class:`videodb.capture_session.CaptureSession` + """ + data = { + "end_user_id": end_user_id, + } + if callback_url: + data["callback_url"] = callback_url + if ws_connection_id: + data["ws_connection_id"] = ws_connection_id + if metadata: + data["metadata"] = metadata + + response = self._connection.post( + path=f"{ApiPath.collection}/{self.id}/{ApiPath.capture}/{ApiPath.session}", + data=data, + ) + # Normalize rtstreams before passing to CaptureSession + for rts in response.get("rtstreams", []): + if isinstance(rts, dict): + if "rtstream_id" in rts and "id" not in rts: + rts["id"] = rts.pop("rtstream_id") + if "collection_id" not in rts: + rts["collection_id"] = self.id + # Extract id and collection_id from response to avoid duplicate arguments + session_id = response.pop("session_id", None) or response.pop("id", None) + response.pop("collection_id", None) + return CaptureSession( + self._connection, id=session_id, collection_id=self.id, **response + ) + + def get_capture_session(self, session_id: str) -> "CaptureSession": + """Get a capture session by its ID. + + :param str session_id: ID of the capture session + :return: :class:`CaptureSession ` object + :rtype: :class:`videodb.capture_session.CaptureSession` + """ + response = self._connection.get( + path=f"{ApiPath.collection}/{self.id}/{ApiPath.capture}/{ApiPath.session}/{session_id}" + ) + # Normalize rtstreams before passing to CaptureSession + for rts in response.get("rtstreams", []): + if isinstance(rts, dict): + if "rtstream_id" in rts and "id" not in rts: + rts["id"] = rts.pop("rtstream_id") + if "collection_id" not in rts: + rts["collection_id"] = self.id + # Extract id and collection_id from response to avoid duplicate arguments + response.pop("id", None) + response.pop("collection_id", None) + return CaptureSession( + self._connection, id=session_id, collection_id=self.id, **response + ) + + def list_capture_sessions(self, status: str = None) -> list["CaptureSession"]: + """List capture sessions. + + :param str status: Filter sessions by status (optional) + :return: List of :class:`CaptureSession ` objects + :rtype: list[:class:`videodb.capture_session.CaptureSession`] + """ + params = {} + if status: + params["status"] = status + + response = self._connection.get( + path=f"{ApiPath.collection}/{self.id}/{ApiPath.capture}/{ApiPath.session}", + params=params, + ) + + sessions = [] + for session_data in response.get("sessions", []): + session_id = session_data.pop("id", None) or session_data.pop( + "session_id", None + ) + # Normalize rtstreams + for rts in session_data.get("rtstreams", []): + if isinstance(rts, dict): + if "rtstream_id" in rts and "id" not in rts: + rts["id"] = rts.pop("rtstream_id") + if "collection_id" not in rts: + rts["collection_id"] = self.id + # Remove collection_id from data + session_data.pop("collection_id", None) + sessions.append( + CaptureSession( + self._connection, + id=session_id, + collection_id=self.id, + **session_data, + ) + ) + return sessions diff --git a/videodb/editor.py b/videodb/editor.py index 4c3ae25..5c40282 100644 --- a/videodb/editor.py +++ b/videodb/editor.py @@ -1,7 +1,14 @@ +import json +import requests + from typing import List, Optional, Union from enum import Enum from videodb._constants import ApiPath +from videodb.exceptions import InvalidRequestError + + +MAX_PAYLOAD_SIZE = 100 * 1024 class AssetType(str, Enum): @@ -349,7 +356,6 @@ class Font: :ivar int size: Font size in pixels :ivar str color: Font color in hex format (e.g., "#FFFFFF") :ivar float opacity: Font opacity (0.0 to 1.0) - :ivar int weight: (optional) Font weight (100 to 900) """ def __init__( @@ -358,7 +364,6 @@ def __init__( size: int = 48, color: str = "#FFFFFF", opacity: float = 1.0, - weight: Optional[int] = None, ): """Initialize a Font instance. @@ -366,21 +371,17 @@ def __init__( :param int size: Font size in pixels (default: 48) :param str color: Font color in hex format (default: "#FFFFFF") :param float opacity: Font opacity between 0.0 and 1.0 (default: 1.0) - :param int weight: (optional) Font weight between 100 and 900 - :raises ValueError: If size < 1, opacity not in [0.0, 1.0], or weight not in [100, 900] + :raises ValueError: If size < 1, opacity not in [0.0, 1.0] """ if size < 1: raise ValueError("size must be at least 1") if not (0.0 <= opacity <= 1.0): raise ValueError("opacity must be between 0.0 and 1.0") - if weight is not None and not (100 <= weight <= 900): - raise ValueError("weight must be between 100 and 900") self.family = family self.size = size self.color = color self.opacity = opacity - self.weight = weight def to_json(self) -> dict: """Convert the font settings to a JSON-serializable dictionary. @@ -394,8 +395,6 @@ def to_json(self) -> dict: "color": self.color, "opacity": self.opacity, } - if self.weight is not None: - data["weight"] = self.weight return data @@ -1100,17 +1099,61 @@ def generate_stream(self) -> str: Makes an API request to render the timeline and generate streaming URLs. Updates the stream_url and player_url instance variables. + If the timeline data exceeds the max payload size, it will be uploaded + as a file first to avoid HTTP content length limits. + :return: The stream URL of the generated video :rtype: str """ - stream_data = self.connection.post( - path=ApiPath.editor, - data=self.to_json(), - ) + timeline_data = self.to_json() + json_str = json.dumps(timeline_data) + payload_size = len(json_str.encode("utf-8")) + + if payload_size > MAX_PAYLOAD_SIZE: + # Upload timeline data as a file to avoid HTTP content length limits + timeline_url = self._upload_timeline_data(json_str) + stream_data = self.connection.post( + path=ApiPath.editor, + data={"timeline_url": timeline_url}, + ) + else: + stream_data = self.connection.post( + path=ApiPath.editor, + data=timeline_data, + ) + self.stream_url = stream_data.get("stream_url") self.player_url = stream_data.get("player_url") return stream_data.get("stream_url", None) + def _upload_timeline_data(self, json_str: str) -> str: + """Upload timeline JSON data as a file and return the URL. + + :param str json_str: The JSON string of timeline data to upload + :return: The URL of the uploaded file + :rtype: str + :raises InvalidRequestError: If upload fails + """ + # Get a presigned upload URL + upload_url_data = self.connection.get( + path=f"{ApiPath.collection}/{self.connection.collection_id}/{ApiPath.upload_url}", + params={"name": "timeline_data.json"}, + ) + upload_url = upload_url_data.get("upload_url") + + # Upload the JSON data as a file + try: + files = {"file": ("timeline_data.json", json_str, "application/json")} + response = requests.post(upload_url, files=files) + response.raise_for_status() + except requests.exceptions.RequestException as e: + raise InvalidRequestError( + f"Failed to upload timeline data: {str(e)}", + getattr(e, "response", None), + ) from None + + return upload_url + def download_stream(self, stream_url: str) -> dict: """Download a stream from the timeline. diff --git a/videodb/meeting.py b/videodb/meeting.py index 827f5c6..6b4e0cb 100644 --- a/videodb/meeting.py +++ b/videodb/meeting.py @@ -29,7 +29,15 @@ def __init__(self, _connection, id: str, collection_id: str, **kwargs) -> None: self._update_attributes(kwargs) def __repr__(self) -> str: - return f"Meeting(id={self.id}, collection_id={self.collection_id}, meeting_title={self.meeting_title}, status={self.status}, bot_name={self.bot_name}, meeting_url={self.meeting_url})" + return ( + f"Meeting(" + f"id={self.id}, " + f"collection_id={self.collection_id}, " + f"meeting_title={self.meeting_title}, " + f"status={self.status}, " + f"bot_name={self.bot_name}, " + f"meeting_url={self.meeting_url})" + ) def _update_attributes(self, data: dict) -> None: """Update instance attributes from API response data. diff --git a/videodb/rtstream.py b/videodb/rtstream.py index 4be4a8c..425b73f 100644 --- a/videodb/rtstream.py +++ b/videodb/rtstream.py @@ -1,7 +1,163 @@ +from typing import Optional, List, Dict, Any + from videodb._constants import ( ApiPath, SceneExtractionType, + Segmenter, ) +from videodb._utils._video import play_stream + + +class RTStreamSearchResult: + """RTStreamSearchResult class to interact with rtstream search results + + :ivar str collection_id: ID of the collection this rtstream belongs to + :ivar List[RTStreamShot] shots: List of shots in the search result + """ + + def __init__( + self, + collection_id: str, + shots: List["RTStreamShot"], + ) -> None: + self.collection_id = collection_id + self.shots = shots + + def __repr__(self) -> str: + return ( + f"RTStreamSearchResult(" + f"collection_id={self.collection_id}, " + f"shots={len(self.shots)})" + ) + + def get_shots(self) -> List["RTStreamShot"]: + """Get the list of shots from the search result. + + :return: List of :class:`RTStreamShot ` objects + :rtype: List[:class:`videodb.rtstream.RTStreamShot`] + """ + return self.shots + + +class RTStreamExportResult: + """Result of exporting an RTStream recording. + + :ivar str video_id: ID of the exported video or audio asset + :ivar str stream_url: URL to stream the exported asset (may be None for audio) + :ivar str player_url: URL to play the exported asset in a player (may be None for audio) + :ivar str name: Name of the exported recording + :ivar float duration: Duration of the exported recording in seconds (may be None on idempotent calls) + """ + + def __init__( + self, + video_id: str, + stream_url: Optional[str] = None, + player_url: Optional[str] = None, + name: Optional[str] = None, + duration: Optional[float] = None, + ) -> None: + self.video_id = video_id + self.stream_url = stream_url + self.player_url = player_url + self.name = name + self.duration = duration + + def __repr__(self) -> str: + return ( + f"RTStreamExportResult(" + f"video_id={self.video_id}, " + f"name={self.name}, " + f"duration={self.duration})" + ) + + +class RTStreamShot: + """RTStreamShot class for rtstream search results + + :ivar str rtstream_id: ID of the rtstream + :ivar str rtstream_name: Name of the rtstream + :ivar float start: Start time in Unix timestamp + :ivar float end: End time in Unix timestamp + :ivar str text: Text content of the shot + :ivar float search_score: Search relevance score + :ivar str scene_index_id: ID of the scene index (optional) + :ivar str scene_index_name: Name of the scene index (optional) + :ivar dict metadata: Additional metadata (optional) + :ivar str stream_url: URL to stream the shot + :ivar str player_url: URL to play the shot in a player + """ + + def __init__( + self, + _connection, + rtstream_id: str, + start: float, + end: float, + rtstream_name: Optional[str] = None, + text: Optional[str] = None, + search_score: Optional[float] = None, + scene_index_id: Optional[str] = None, + scene_index_name: Optional[str] = None, + metadata: Optional[dict] = None, + ) -> None: + self._connection = _connection + self.rtstream_id = rtstream_id + self.rtstream_name = rtstream_name + self.start = start + self.end = end + self.text = text + self.search_score = search_score + self.scene_index_id = scene_index_id + self.scene_index_name = scene_index_name + self.metadata = metadata + self.stream_url = None + self.player_url = None + + def __repr__(self) -> str: + repr_str = ( + f"RTStreamShot(" + f"rtstream_id={self.rtstream_id}, " + f"rtstream_name={self.rtstream_name}, " + f"start={self.start}, " + f"end={self.end}, " + f"text={self.text}, " + f"search_score={self.search_score}" + ) + if self.scene_index_id: + repr_str += f", scene_index_id={self.scene_index_id}" + if self.scene_index_name: + repr_str += f", scene_index_name={self.scene_index_name}" + if self.metadata: + repr_str += f", metadata={self.metadata}" + repr_str += ")" + return repr_str + + def generate_stream(self) -> str: + """Generate a stream url for the shot. + + :return: The stream url + :rtype: str + """ + if self.stream_url: + return self.stream_url + + stream_data = self._connection.get( + f"{ApiPath.rtstream}/{self.rtstream_id}/{ApiPath.stream}", + params={"start": int(self.start), "end": int(self.end)}, + ) + self.stream_url = stream_data.get("stream_url") + self.player_url = stream_data.get("player_url") + return self.stream_url + + def play(self) -> str: + """Generate a stream url for the shot and open it in the default browser. + + :return: The stream url + :rtype: str + """ + self.generate_stream() + return play_stream(self.stream_url) class RTStreamSceneIndex: @@ -90,20 +246,24 @@ def stop(self): ) self.status = "stopped" - def create_alert(self, event_id, callback_url) -> str: + def create_alert(self, event_id, callback_url, ws_connection_id=None) -> str: """Create an event alert. :param str event_id: ID of the event :param str callback_url: URL to receive the alert callback + :param str ws_connection_id: WebSocket connection ID for real-time alerts :return: Alert ID :rtype: str """ + data = { + "event_id": event_id, + "callback_url": callback_url, + } + if ws_connection_id: + data["ws_connection_id"] = ws_connection_id alert_data = self._connection.post( f"{ApiPath.rtstream}/{self.rtstream_id}/{ApiPath.index}/{self.rtstream_index_id}/{ApiPath.alert}", - data={ - "event_id": event_id, - "callback_url": callback_url, - }, + data=data, ) return alert_data.get("alert_id", None) @@ -162,6 +322,7 @@ def __init__(self, _connection, id: str, **kwargs) -> None: self.created_at = kwargs.get("created_at", None) self.sample_rate = kwargs.get("sample_rate", None) self.status = kwargs.get("status", None) + self.channel_id = kwargs.get("channel_id", None) def __repr__(self) -> str: return ( @@ -198,6 +359,68 @@ def stop(self): ) self.status = "stopped" + def export(self, name: Optional[str] = None) -> "RTStreamExportResult": + """Export the latest completed recording as a video or audio asset. + + The stream must be stopped before exporting. The call is idempotent: + calling it again returns the same asset without re-ingesting. + + :param str name: Name for the exported asset (optional, defaults to "{stream_name} - Recording") + :return: Export result with the asset ID and metadata + :rtype: :class:`RTStreamExportResult` + """ + data = {} + if name is not None: + data["name"] = name + + export_data = self._connection.post( + path=f"{ApiPath.rtstream}/{self.id}/{ApiPath.export}", + data=data, + ) + return RTStreamExportResult( + video_id=export_data.get("video_id"), + stream_url=export_data.get("stream_url"), + player_url=export_data.get("player_url"), + name=export_data.get("name"), + duration=export_data.get("duration"), + ) + + def start_transcript( + self, ws_connection_id: Optional[str] = None, engine: Optional[str] = None + ) -> dict: + """Start transcription for the rtstream. + + :param str ws_connection_id: WebSocket connection ID for real-time transcript updates (optional) + :param str engine: Transcription engine (optional, server defaults to "assemblyai") + :return: Transcription status with start time + :rtype: dict + """ + data = {"action": "start"} + if engine: + data["engine"] = engine + if ws_connection_id: + data["ws_connection_id"] = ws_connection_id + + return self._connection.post( + f"{ApiPath.rtstream}/{self.id}/{ApiPath.transcription}", + data=data, + ) + + def stop_transcript(self, engine: Optional[str] = None) -> dict: + """Stop transcription for the rtstream. + + :param str engine: Transcription engine (optional, server defaults to "assemblyai") + :return: Transcription status with start and end time + :rtype: dict + """ + data = {"action": "stop"} + if engine: + data["engine"] = engine + return self._connection.post( + f"{ApiPath.rtstream}/{self.id}/{ApiPath.transcription}", + data=data, + ) + def generate_stream(self, start, end): """Generate a stream from the rtstream. @@ -220,6 +443,7 @@ def index_scenes( model_name=None, model_config={}, name=None, + ws_connection_id: Optional[str] = None, ): """Index scenes from the rtstream. @@ -229,19 +453,197 @@ def index_scenes( :param str model_name: Name of the model :param dict model_config: Configuration for the model :param str name: Name of the scene index + :param str ws_connection_id: WebSocket connection ID for real-time updates (optional) :return: Scene index, :class:`RTStreamSceneIndex ` object :rtype: :class:`videodb.rtstream.RTStreamSceneIndex` """ + data = { + "extraction_type": extraction_type, + "extraction_config": extraction_config, + "prompt": prompt, + "model_name": model_name, + "model_config": model_config, + "name": name, + } + if ws_connection_id: + data["ws_connection_id"] = ws_connection_id + index_data = self._connection.post( f"{ApiPath.rtstream}/{self.id}/{ApiPath.index}/{ApiPath.scene}", - data={ - "extraction_type": extraction_type, - "extraction_config": extraction_config, - "prompt": prompt, - "model_name": model_name, - "model_config": model_config, - "name": name, - }, + data=data, + ) + if not index_data: + return None + return RTStreamSceneIndex( + _connection=self._connection, + rtstream_index_id=index_data.get("rtstream_index_id"), + rtstream_id=self.id, + extraction_type=index_data.get("extraction_type"), + extraction_config=index_data.get("extraction_config"), + prompt=index_data.get("prompt"), + name=index_data.get("name"), + status=index_data.get("status"), + ) + + def index_spoken_words( + self, + prompt: str = None, + segmenter: str = Segmenter.word, + length: int = 10, + model_name: str = None, + model_config: dict = {}, + name: str = None, + ws_connection_id: Optional[str] = None, + ): + """Index spoken words from the rtstream transcript. + + :param str prompt: Prompt for summarizing transcript segments + :param Segmenter segmenter: Segmentation type (:class:`Segmenter.word`, + :class:`Segmenter.sentence`, :class:`Segmenter.time`) + :param int length: Length of segments (words, sentences, or seconds based on segmenter) + :param str model_name: Name of the model + :param dict model_config: Configuration for the model + :param str name: Name of the spoken words index + :param str ws_connection_id: WebSocket connection ID for real-time updates (optional) + :return: Scene index, :class:`RTStreamSceneIndex ` object + :rtype: :class:`videodb.rtstream.RTStreamSceneIndex` + """ + extraction_config = { + "segmenter": segmenter, + "segmentation_value": length, + } + + data = { + "extraction_type": SceneExtractionType.transcript, + "extraction_config": extraction_config, + "prompt": prompt, + "model_name": model_name, + "model_config": model_config, + "name": name, + } + if ws_connection_id: + data["ws_connection_id"] = ws_connection_id + + index_data = self._connection.post( + f"{ApiPath.rtstream}/{self.id}/{ApiPath.index}/{ApiPath.scene}", + data=data, + ) + if not index_data: + return None + return RTStreamSceneIndex( + _connection=self._connection, + rtstream_index_id=index_data.get("rtstream_index_id"), + rtstream_id=self.id, + extraction_type=index_data.get("extraction_type"), + extraction_config=index_data.get("extraction_config"), + prompt=index_data.get("prompt"), + name=index_data.get("name"), + status=index_data.get("status"), + ) + + def index_audio( + self, + prompt: str = None, + batch_config: dict = None, + model_name: str = None, + model_config: dict = {}, + name: str = None, + ws_connection_id: Optional[str] = None, + ): + """Index audio from the rtstream transcript. + + :param str prompt: Prompt for summarizing transcript segments + :param dict batch_config: Segmentation config with keys: + - "type": Segmentation type ("word", "sentence", or "time") + - "value": Segment length (words, sentences, or seconds) + :param str model_name: Name of the model + :param dict model_config: Configuration for the model + :param str name: Name of the audio index + :param str ws_connection_id: WebSocket connection ID for real-time updates (optional) + :return: Scene index, :class:`RTStreamSceneIndex ` object + :rtype: :class:`videodb.rtstream.RTStreamSceneIndex` + """ + if batch_config is not None: + extraction_config = { + "segmenter": batch_config.get("type"), + "segmentation_value": batch_config.get("value"), + } + else: + extraction_config = None + + data = { + "extraction_type": SceneExtractionType.transcript, + "extraction_config": extraction_config, + "prompt": prompt, + "model_name": model_name, + "model_config": model_config, + "name": name, + } + if ws_connection_id: + data["ws_connection_id"] = ws_connection_id + + index_data = self._connection.post( + f"{ApiPath.rtstream}/{self.id}/{ApiPath.index}/{ApiPath.scene}", + data=data, + ) + if not index_data: + return None + return RTStreamSceneIndex( + _connection=self._connection, + rtstream_index_id=index_data.get("rtstream_index_id"), + rtstream_id=self.id, + extraction_type=index_data.get("extraction_type"), + extraction_config=index_data.get("extraction_config"), + prompt=index_data.get("prompt"), + name=index_data.get("name"), + status=index_data.get("status"), + ) + + def index_visuals( + self, + prompt: str = None, + batch_config: dict = None, + model_name: str = None, + model_config: dict = {}, + name: str = None, + ws_connection_id: Optional[str] = None, + ): + """Index visuals (scenes) from the rtstream. + + :param str prompt: Prompt for scene description + :param dict batch_config: Frame extraction config with keys: + - "type": Only "time" is supported + - "value": Window size in seconds + - "frame_count": Number of frames to extract per window + :param str model_name: Name of the model + :param dict model_config: Configuration for the model + :param str name: Name of the visual index + :param str ws_connection_id: WebSocket connection ID for real-time updates (optional) + :return: Scene index, :class:`RTStreamSceneIndex ` object + :rtype: :class:`videodb.rtstream.RTStreamSceneIndex` + """ + if batch_config is not None: + extraction_config = { + "time": batch_config.get("value"), + "frame_count": batch_config.get("frame_count"), + } + else: + extraction_config = None + + data = { + "extraction_type": SceneExtractionType.time_based, + "extraction_config": extraction_config, + "prompt": prompt, + "model_name": model_name, + "model_config": model_config, + "name": name, + } + if ws_connection_id: + data["ws_connection_id"] = ws_connection_id + + index_data = self._connection.post( + f"{ApiPath.rtstream}/{self.id}/{ApiPath.index}/{ApiPath.scene}", + data=data, ) if not index_data: return None @@ -299,3 +701,100 @@ def get_scene_index(self, index_id: str) -> RTStreamSceneIndex: name=index_data.get("name"), status=index_data.get("status"), ) + + def get_transcript( + self, + page=1, + page_size=100, + start=None, + end=None, + since=None, + engine=None, + ): + """Get transcription data from the rtstream. + + :param int page: Page number (default: 1) + :param int page_size: Items per page (default: 100, max: 1000) + :param float start: Start timestamp filter (optional) + :param float end: End timestamp filter (optional) + :param float since: For polling - only get transcriptions after this timestamp (optional) + :param str engine: Transcription engine (default: "AAIS") + :return: Transcription data with segments and metadata + :rtype: dict + """ + params = { + "engine": engine, + "page": page, + "page_size": page_size, + } + if start is not None: + params["start"] = start + if end is not None: + params["end"] = end + if since is not None: + params["since"] = since + + transcription_data = self._connection.get( + f"{ApiPath.rtstream}/{self.id}/{ApiPath.transcription}", + params=params, + ) + return transcription_data + + def search( + self, + query: str, + index_id: Optional[str] = None, + result_threshold: Optional[int] = None, + score_threshold: Optional[float] = None, + dynamic_score_percentage: Optional[float] = None, + filter: Optional[List[Dict[str, Any]]] = None, + ) -> RTStreamSearchResult: + """Search across scene index records for the rtstream. + + :param str query: Query to search for + :param str index_id: Filter by specific scene index (optional) + :param int result_threshold: Number of results to return (optional) + :param float score_threshold: Minimum score threshold (optional) + :param float dynamic_score_percentage: Percentage of dynamic score to consider (optional) + :param list filter: Additional metadata filters (optional) + :return: :class:`RTStreamSearchResult ` object + :rtype: :class:`videodb.rtstream.RTStreamSearchResult` + """ + data = {"query": query} + + if index_id is not None: + data["scene_index_id"] = index_id + if result_threshold is not None: + data["result_threshold"] = result_threshold + if score_threshold is not None: + data["score_threshold"] = score_threshold + if dynamic_score_percentage is not None: + data["dynamic_score_percentage"] = dynamic_score_percentage + if filter is not None: + data["filter"] = filter + + search_data = self._connection.post( + f"{ApiPath.rtstream}/{self.id}/{ApiPath.search}", + data=data, + ) + + results = search_data.get("results", []) + shots = [ + RTStreamShot( + _connection=self._connection, + rtstream_id=self.id, + rtstream_name=self.name, + start=result.get("start"), + end=result.get("end"), + text=result.get("text"), + search_score=result.get("score"), + scene_index_id=result.get("scene_index_id"), + scene_index_name=result.get("scene_index_name"), + metadata=result.get("metadata"), + ) + for result in results + ] + return RTStreamSearchResult( + collection_id=self.collection_id, + shots=shots, + ) diff --git a/videodb/search.py b/videodb/search.py index ba557be..94730ec 100644 --- a/videodb/search.py +++ b/videodb/search.py @@ -45,6 +45,9 @@ def _format_results(self): doc.get("end"), doc.get("text"), doc.get("score"), + scene_index_id=doc.get("scene_index_id"), + scene_index_name=doc.get("scene_index_name"), + metadata=doc.get("metadata"), ) ) @@ -132,9 +135,11 @@ def search_inside_video( "index_type": index_type, "query": query, "score_threshold": score_threshold - or SemanticSearchDefaultValues.score_threshold, + if score_threshold is not None + else SemanticSearchDefaultValues.score_threshold, "result_threshold": result_threshold - or SemanticSearchDefaultValues.result_threshold, + if result_threshold is not None + else SemanticSearchDefaultValues.result_threshold, "dynamic_score_percentage": dynamic_score_percentage, **kwargs, }, @@ -159,9 +164,11 @@ def search_inside_collection( "index_type": index_type, "query": query, "score_threshold": score_threshold - or SemanticSearchDefaultValues.score_threshold, + if score_threshold is not None + else SemanticSearchDefaultValues.score_threshold, "result_threshold": result_threshold - or SemanticSearchDefaultValues.result_threshold, + if result_threshold is not None + else SemanticSearchDefaultValues.result_threshold, "dynamic_score_percentage": dynamic_score_percentage, **kwargs, }, diff --git a/videodb/shot.py b/videodb/shot.py index c2fadcb..b261077 100644 --- a/videodb/shot.py +++ b/videodb/shot.py @@ -1,5 +1,3 @@ - - from typing import Optional from videodb._utils._video import play_stream from videodb._constants import ( @@ -19,6 +17,9 @@ class Shot: :ivar int search_score: Search relevance score :ivar str stream_url: URL to stream the shot :ivar str player_url: URL to play the shot in a player + :ivar Optional[str] scene_index_id: ID of the scene index for scene search results + :ivar Optional[str] scene_index_name: Name of the scene index for scene search results + :ivar Optional[dict] metadata: Additional metadata for the shot """ def __init__( @@ -31,6 +32,9 @@ def __init__( end: float, text: Optional[str] = None, search_score: Optional[int] = None, + scene_index_id: Optional[str] = None, + scene_index_name: Optional[str] = None, + metadata: Optional[dict] = None, ) -> None: self._connection = _connection self.video_id = video_id @@ -40,21 +44,33 @@ def __init__( self.end = end self.text = text self.search_score = search_score + self.scene_index_id = scene_index_id + self.scene_index_name = scene_index_name + self.metadata = metadata self.stream_url = None self.player_url = None def __repr__(self) -> str: - return ( + repr_str = ( f"Shot(" f"video_id={self.video_id}, " f"video_title={self.video_title}, " f"start={self.start}, " f"end={self.end}, " f"text={self.text}, " - f"search_score={self.search_score}, " - f"stream_url={self.stream_url}, " - f"player_url={self.player_url})" + f"search_score={self.search_score}" ) + if self.scene_index_id: + repr_str += f", scene_index_id={self.scene_index_id}" + + if self.scene_index_name: + repr_str += f", scene_index_name={self.scene_index_name}" + + if self.metadata: + repr_str += f", metadata={self.metadata}" + + repr_str += f", stream_url={self.stream_url}, player_url={self.player_url})" + return repr_str def __getitem__(self, key): """Get an item from the shot object""" diff --git a/videodb/video.py b/videodb/video.py index 34af19d..de4ea43 100644 --- a/videodb/video.py +++ b/videodb/video.py @@ -7,6 +7,7 @@ SceneExtractionType, SearchType, Segmenter, + SegmentationType, SubtitleStyle, Workflows, ) @@ -297,12 +298,14 @@ def translate_transcript( def index_spoken_words( self, language_code: Optional[str] = None, + segmentation_type: Optional[SegmentationType] = SegmentationType.sentence, force: bool = False, callback_url: str = None, ) -> None: """Semantic indexing of spoken words in the video. :param str language_code: (optional) Language code of the video + :param SegmentationType segmentation_type: (optional) Segmentation type used for indexing, :class:`SegmentationType ` object :param bool force: (optional) Force to index the video :param str callback_url: (optional) URL to receive the callback :raises InvalidRequestError: If the video is already indexed @@ -314,6 +317,7 @@ def index_spoken_words( data={ "index_type": IndexType.spoken_word, "language_code": language_code, + "segmentation_type": segmentation_type, "force": force, "callback_url": callback_url, }, @@ -523,6 +527,119 @@ def index_scenes( return None return scenes_data.get("scene_index_id") + def index_visuals( + self, + prompt: Optional[str] = None, + batch_config: Optional[Dict] = None, + model_name: Optional[str] = None, + model_config: Optional[Dict] = None, + name: Optional[str] = None, + callback_url: Optional[str] = None, + ) -> Optional[str]: + """Index visuals (scenes) from the video. + + :param str prompt: Prompt for scene description + :param dict batch_config: Frame extraction config with keys: + - "type": Extraction type ("time" or "shot"). Default is "time". + - "value": Window size in seconds (for time) or threshold (for shot). Default is 10. + - "frame_count": Number of frames to extract per window. Default is 1. + - "select_frames": Which frames to select (e.g., ["first", "middle", "last"]). Default is ["first"]. + :param str model_name: Name of the model + :param dict model_config: Configuration for the model + :param str name: Name of the visual index + :param str callback_url: URL to receive the callback (optional) + :return: The scene index id + :rtype: str + """ + if batch_config is not None: + extraction_type = batch_config.get("type") + if extraction_type == "shot": + extraction_type = SceneExtractionType.shot_based + extraction_config = { + "threshold": batch_config.get("value"), + "frame_count": batch_config.get("frame_count"), + } + else: + extraction_type = SceneExtractionType.time_based + extraction_config = { + "time": batch_config.get("value"), + "frame_count": batch_config.get("frame_count"), + "select_frames": batch_config.get("select_frames"), + } + else: + extraction_type = None + extraction_config = None + + scenes_data = self._connection.post( + path=f"{ApiPath.video}/{self.id}/{ApiPath.index}/{ApiPath.scene}", + data={ + "extraction_type": extraction_type, + "extraction_config": extraction_config, + "prompt": prompt, + "model_name": model_name, + "model_config": model_config or {}, + "name": name, + "callback_url": callback_url, + }, + ) + if not scenes_data: + return None + return scenes_data.get("scene_index_id") + + def index_audio( + self, + prompt: Optional[str] = None, + model_name: Optional[str] = None, + model_config: Optional[Dict] = None, + language_code: Optional[str] = None, + batch_config: Optional[Dict] = None, + name: Optional[str] = None, + callback_url: Optional[str] = None, + ) -> Optional[str]: + """Index audio by processing transcript segments through an LLM. + + Segments the video transcript, processes each segment with the given + prompt using the specified model, and indexes the results as scene + records for semantic search. + + :param str prompt: (optional) Prompt for processing transcript segments + :param str model_name: (optional) LLM tier to use (e.g. "basic", "pro", "ultra") + :param dict model_config: (optional) Model configuration + :param str language_code: (optional) Language code for transcription + :param dict batch_config: (optional) Segmentation config with keys: + - "type": Segmentation type ("word", "sentence", or "time") + - "value": Segment length (words, sentences, or seconds) + Defaults to {"type": "word", "value": 10} + :param str name: (optional) Name for the scene index + :param str callback_url: (optional) URL to receive the callback + :return: The scene index id + :rtype: str + """ + if batch_config is not None: + extraction_config = { + "segmenter": batch_config.get("type"), + "segmentation_value": batch_config.get("value"), + } + else: + extraction_config = None + + scenes_data = self._connection.post( + path=f"{ApiPath.video}/{self.id}/{ApiPath.index}/{ApiPath.scene}", + data={ + "extraction_type": SceneExtractionType.transcript, + "extraction_config": extraction_config, + "prompt": prompt, + "model_name": model_name, + "model_config": model_config, + "language_code": language_code, + "name": name, + "callback_url": callback_url, + }, + ) + if not scenes_data: + return None + return scenes_data.get("scene_index_id") + def list_scene_index(self) -> List: """List all the scene indexes. @@ -582,6 +699,30 @@ def add_subtitle(self, style: SubtitleStyle = SubtitleStyle()) -> str: ) return subtitle_data.get("stream_url", None) + def clip( + self, + prompt: str, + content_type: str, + model_name: str, + ) -> str: + """Generate a clip from the video using a prompt. + :param str prompt: Prompt to generate the clip + :param str content_type: Content type for the clip + :param str model_name: Model name for generation + :return: The stream url of the generated clip + :rtype: str + """ + + clip_data = self._connection.post( + path=f"{ApiPath.video}/{self.id}/{ApiPath.clip}", + data={ + "prompt": prompt, + "content_type": content_type, + "model_name": model_name, + }, + ) + return SearchResult(self._connection, **clip_data) + def insert_video(self, video, timestamp: float) -> str: """Insert a video into another video diff --git a/videodb/websocket_client.py b/videodb/websocket_client.py new file mode 100644 index 0000000..462c0e4 --- /dev/null +++ b/videodb/websocket_client.py @@ -0,0 +1,72 @@ +import json +import logging +from typing import AsyncGenerator + +# Deferred import to avoid hard dependency at module level if installed without extra +try: + import websockets +except ImportError: + websockets = None + +logger = logging.getLogger(__name__) + +class WebSocketConnection: + """Class representing a persistent WebSocket connection for receiving events.""" + + def __init__(self, url: str) -> None: + if websockets is None: + raise ImportError( + "The 'websockets' library is required for WebSocket support. " + "Please install it using 'pip install videodb[websockets]' or 'pip install websockets'." + ) + self.url = url + self._connection = None + self.connection_id = None + + async def connect(self) -> "WebSocketConnection": + """Establish the WebSocket connection.""" + logger.debug(f"Connecting to WebSocket URL: {self.url}") + self._connection = await websockets.connect(self.url) + + # Expect the first message to be the connection init containing the ID + try: + init_msg = await self._connection.recv() + data = json.loads(init_msg) + self.connection_id = data.get("connection_id") + logger.info(f"WebSocket connected with ID: {self.connection_id}") + except Exception as e: + logger.error(f"Failed to receive initialization message: {e}") + await self.close() + raise e + + return self + + async def close(self) -> None: + """Close the WebSocket connection.""" + if self._connection: + await self._connection.close() + self._connection = None + + async def receive(self) -> AsyncGenerator[dict, None]: + """Async generator that yields received messages.""" + if not self._connection: + raise ConnectionError("WebSocket is not connected. Call connect() first.") + + async for message in self._connection: + try: + yield json.loads(message) + except json.JSONDecodeError: + logger.warning(f"Received non-JSON message: {message}") + yield {"raw": message} + + async def send(self, message: dict) -> None: + """Send a message over the WebSocket.""" + if not self._connection: + raise ConnectionError("WebSocket is not connected.") + await self._connection.send(json.dumps(message)) + + async def __aenter__(self): + return await self.connect() + + async def __aexit__(self, exc_type, exc_value, traceback): + await self.close()