From a1a1c49b98795bb8d67410a5a37ec4980794b8b5 Mon Sep 17 00:00:00 2001 From: Ian Lunn Date: Fri, 17 Oct 2025 11:26:23 +0100 Subject: [PATCH 1/8] Add caption task type --- Runware/types.ts | 15 +++++++++++++++ readme.md | 27 +++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/Runware/types.ts b/Runware/types.ts index 61ca810..d6de9c1 100644 --- a/Runware/types.ts +++ b/Runware/types.ts @@ -14,6 +14,7 @@ export enum ETaskType { UPSCALE = "upscale", REMOVE_BACKGROUND = "removeBackground", VIDEO_INFERENCE = "videoInference", + CAPTION = "caption", GET_RESPONSE = "getResponse", PHOTO_MAKER = "photoMaker", IMAGE_CAPTION = "imageCaption", @@ -333,6 +334,20 @@ export interface IRequestVideo extends IRequestImageToText { [key: string]: any; } + +export interface IRequestCaption { + model: string; + inputs?: { + video?: InputsValue; + } & { + [key: string]: unknown; + }; + includeCost?: boolean; + customTaskUUID?: string; + + skipResponse?: boolean; + [key: string]: any; +} export interface IAsyncResults { taskUUID: string; onPartialImages?: (images: IImage[], error?: IError) => void; diff --git a/readme.md b/readme.md index bea180d..dd6a332 100644 --- a/readme.md +++ b/readme.md @@ -431,6 +431,33 @@ return interface IControlNetImage {   +### Request Caption + +[Read Documentation](https://docs.runware.ai/en/utilities/caption) + +```js + +const runware = new Runware({ apiKey: "API_KEY" }); +const caption = await runware.caption({ + "model": "memories:1@1", + inputs: { + video: "https://example.com/video.mp4" + } +}); + +console.log(caption) + +return interface ICaption { + taskUUID: string; + taskType: string; + status?: string; + text?: string; + cost?: number; +} +``` + +  + ### Model Upload [Read Documentation](https://docs.runware.ai/en/image-inference/model-upload) From 6010c75639b9d5533c8d5310cea48d636034b868 Mon Sep 17 00:00:00 2001 From: Ian Lunn Date: Tue, 4 Nov 2025 13:50:12 +0000 Subject: [PATCH 2/8] Add caption task type --- Runware/Runware-base.ts | 131 ++++++++++++++++++---------------------- Runware/types.ts | 10 ++- readme.md | 4 ++ 3 files changed, 72 insertions(+), 73 deletions(-) diff --git a/Runware/Runware-base.ts b/Runware/Runware-base.ts index 6395f95..ae5be54 100644 --- a/Runware/Runware-base.ts +++ b/Runware/Runware-base.ts @@ -95,11 +95,16 @@ export class RunwareBase { this._timeoutDuration = timeoutDuration; } - private getResultUUID(result: any): string | undefined { - // Find the UUID for a given input type - // mediaUUID = generic input - // Others = specific input types - for (const key of ["mediaUUID", "imageUUID", "videoUUID"]) { + /** + * Returns the first string value found in the result object for the given keys. + * Used to extract a value (such as a UUID, text, or other result parameter) from a result object. + * The keys provided in resultKeys may refer to UUIDs (e.g., mediaUUID, imageUUID, videoUUID) or other string fields (e.g., text for captioning). + * @param result - The result object to search for a string value. + * @param resultKeys - The list of keys to check in order of priority. + * @returns The first string value found for the specified keys, or undefined if none found. + */ + private getResultValue(result: any, resultKeys = ["mediaUUID", "imageUUID", "videoUUID"]): string | undefined { + for (const key of resultKeys) { if (typeof result[key] === "string") return result[key]; } return undefined; @@ -111,11 +116,13 @@ export class RunwareBase { * @param numberResults - Number of results expected. * @returns Promise resolving to array of results. */ - private async pollForAsyncResults({ + private async pollForAsyncResults({ taskUUID, + resultKeys, numberResults = 1, }: { taskUUID: string; + resultKeys?: string[]; numberResults?: number; }): Promise { const allResults = new Map(); @@ -126,9 +133,9 @@ export class RunwareBase { // Add results to the collection for (const result of results || []) { - const resultUUID = this.getResultUUID(result); - if (resultUUID) { - allResults.set(resultUUID, result); + const resultValue = this.getResultValue(result, resultKeys); + if (resultValue) { + allResults.set(resultValue, result); } } @@ -799,83 +806,61 @@ export class RunwareBase { requestImageToText = async ({ inputImage, + inputs, includeCost, customTaskUUID, taskUUID: _taskUUID, retry, includePayload, includeGenerationTime, + deliveryMethod, + skipResponse, + model, }: IRequestImageToText): Promise => { - const totalRetry = retry || this._globalMaxRetries; - let lis: any = undefined; - - const startTime = Date.now(); - try { - return await asyncRetry( - async () => { - await this.ensureConnection(); - const imageUploaded = inputImage - ? await this.uploadImage(inputImage as File | string) - : null; - - const taskUUID = _taskUUID || customTaskUUID || getUUID(); - - const payload = { - taskUUID, - taskType: ETaskType.IMAGE_CAPTION, - inputImage: imageUploaded?.imageUUID, - ...evaluateNonTrue({ key: "includeCost", value: includeCost }), - }; - - this.send(payload); - - lis = this.globalListener({ - taskUUID, - }); - - const response = await getIntervalWithPromise( - ({ resolve, reject }) => { - const newReverseClip = this.getSingleMessage({ - taskUUID, - }); + let imageUploaded; - if (!newReverseClip) return; + // TODO: Add support for handling all media uploads from inputs object + // This is legacy support for inputImage only + if (inputImage) { + imageUploaded = await this.uploadImage(inputImage as File | string); + } - if (newReverseClip?.error) { - reject(newReverseClip); - return true; - } + const taskUUID = _taskUUID || customTaskUUID || getUUID(); + const payload = { + taskUUID, + taskType: ETaskType.CAPTION, + model, + inputImage: imageUploaded?.imageUUID, + inputs, + ...evaluateNonTrue({ key: "includeCost", value: includeCost }), + retry, + includePayload, + includeGenerationTime, + }; - if (newReverseClip) { - delete this._globalMessages[taskUUID]; - resolve(newReverseClip); - return true; - } - }, - { - debugKey: "remove-image-background", - timeoutDuration: this._timeoutDuration, - } - ); + const request = await this.baseSingleRequest({ + payload: { + ...payload, + taskType: ETaskType.CAPTION, + }, + debugKey: "caption", + }); - lis.destroy(); + if (skipResponse) { + return request; + } - this.insertAdditionalResponse({ - response: response, - payload: includePayload ? payload : undefined, - startTime: includeGenerationTime ? startTime : undefined, - }); + if (deliveryMethod === "async") { + const taskUUID = request?.taskUUID; + const results = await this.pollForAsyncResults({ + taskUUID, + resultKeys: ["text"], + }); + return results[0]; + } - return response as IImageToText; - }, - { - maxRetries: totalRetry, - callback: () => { - lis?.destroy(); - }, - } - ); + return request; } catch (e) { throw e; } @@ -1438,6 +1423,8 @@ export class RunwareBase { } ); + console.log("payload", payload); + this.insertAdditionalResponse({ response: response, payload: includePayload ? payload : undefined, diff --git a/Runware/types.ts b/Runware/types.ts index d6de9c1..dd673b4 100644 --- a/Runware/types.ts +++ b/Runware/types.ts @@ -17,7 +17,6 @@ export enum ETaskType { CAPTION = "caption", GET_RESPONSE = "getResponse", PHOTO_MAKER = "photoMaker", - IMAGE_CAPTION = "imageCaption", IMAGE_CONTROL_NET_PRE_PROCESS = "imageControlNetPreProcess", IMAGE_MASKING = "imageMasking", PROMPT_ENHANCE = "promptEnhance", @@ -252,11 +251,20 @@ export interface IRefiner { startStepPercentage?: number; } export interface IRequestImageToText extends IAdditionalResponsePayload { + model?: string; inputImage?: File | string; + inputs?: { + video?: InputsValue; + } & { + [key: string]: unknown; + }; includeCost?: boolean; customTaskUUID?: string; taskUUID?: string; retry?: number; + + deliveryMethod?: string; + skipResponse?: boolean; } export interface IImageToText { taskType: ETaskType; diff --git a/readme.md b/readme.md index dd6a332..a9b8f7d 100644 --- a/readme.md +++ b/readme.md @@ -706,6 +706,10 @@ export type TImageMaskingResponse = { ## Changelog +### - v1.2.1 + +- Added caption task type + ### - v1.2.0 - Change removeImageBackground taskType from `removeImageBackground` to `removeBackground` -- removeBackground is compatible with removeImageBackground but it also supports other media inputs such as removing backgrounds from videos From a3d9431f0b5963e059d08b182f5c96f4cd8a3fe7 Mon Sep 17 00:00:00 2001 From: Ian Lunn Date: Tue, 4 Nov 2025 14:08:45 +0000 Subject: [PATCH 3/8] Cleanup --- Runware/Runware-base.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Runware/Runware-base.ts b/Runware/Runware-base.ts index ae5be54..19b7044 100644 --- a/Runware/Runware-base.ts +++ b/Runware/Runware-base.ts @@ -116,7 +116,7 @@ export class RunwareBase { * @param numberResults - Number of results expected. * @returns Promise resolving to array of results. */ - private async pollForAsyncResults({ + private async pollForAsyncResults({ taskUUID, resultKeys, numberResults = 1, @@ -1423,8 +1423,6 @@ export class RunwareBase { } ); - console.log("payload", payload); - this.insertAdditionalResponse({ response: response, payload: includePayload ? payload : undefined, From bcf769d2a80a2a2d6f4b9247ce49a4016b282d2f Mon Sep 17 00:00:00 2001 From: Ian Lunn Date: Tue, 4 Nov 2025 14:10:27 +0000 Subject: [PATCH 4/8] Update readme --- readme.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/readme.md b/readme.md index a9b8f7d..d682776 100644 --- a/readme.md +++ b/readme.md @@ -447,11 +447,10 @@ const caption = await runware.caption({ console.log(caption) -return interface ICaption { +return interface IImageToText { + taskType: ETaskType; taskUUID: string; - taskType: string; - status?: string; - text?: string; + text: string; cost?: number; } ``` From 1929600d2cfb12949bf50b45e1f6fd1da568a27f Mon Sep 17 00:00:00 2001 From: Ian Lunn Date: Tue, 4 Nov 2025 14:11:48 +0000 Subject: [PATCH 5/8] Remove unused interface --- Runware/types.ts | 13 ------------- package.json | 2 +- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/Runware/types.ts b/Runware/types.ts index dd673b4..c809dfb 100644 --- a/Runware/types.ts +++ b/Runware/types.ts @@ -343,19 +343,6 @@ export interface IRequestVideo extends IRequestImageToText { [key: string]: any; } -export interface IRequestCaption { - model: string; - inputs?: { - video?: InputsValue; - } & { - [key: string]: unknown; - }; - includeCost?: boolean; - customTaskUUID?: string; - - skipResponse?: boolean; - [key: string]: any; -} export interface IAsyncResults { taskUUID: string; onPartialImages?: (images: IImage[], error?: IError) => void; diff --git a/package.json b/package.json index 9dfa0f9..63e892b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@runware/sdk-js", - "version": "1.2.0", + "version": "1.2.1-beta.1", "description": "The SDK is used to run image inference with the Runware API, powered by the RunWare inference platform. It can be used to generate imaged with text-to-image and image-to-image. It also allows the use of an existing gallery of models or selecting any model or LoRA from the CivitAI gallery. The API also supports upscaling, background removal, inpainting and outpainting, and a series of other ControlNet models.", "main": "dist/index.js", "module": "dist/index.js", From 5bad29116ba7012d95148932f1f960c598357f40 Mon Sep 17 00:00:00 2001 From: Ian Lunn Date: Mon, 10 Nov 2025 10:00:17 +0000 Subject: [PATCH 6/8] Update version --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 63e892b..ac6b3bd 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@runware/sdk-js", - "version": "1.2.1-beta.1", + "version": "1.2.1", "description": "The SDK is used to run image inference with the Runware API, powered by the RunWare inference platform. It can be used to generate imaged with text-to-image and image-to-image. It also allows the use of an existing gallery of models or selecting any model or LoRA from the CivitAI gallery. The API also supports upscaling, background removal, inpainting and outpainting, and a series of other ControlNet models.", "main": "dist/index.js", "module": "dist/index.js", From 79f9efdeff748882251a0c9380026fd47909c1ff Mon Sep 17 00:00:00 2001 From: Ian Lunn Date: Tue, 11 Nov 2025 11:29:47 +0000 Subject: [PATCH 7/8] Use status to determine success response --- Runware/Runware-base.ts | 29 +++++------------------------ Runware/types.ts | 4 ++++ 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/Runware/Runware-base.ts b/Runware/Runware-base.ts index 19b7044..988e881 100644 --- a/Runware/Runware-base.ts +++ b/Runware/Runware-base.ts @@ -95,47 +95,29 @@ export class RunwareBase { this._timeoutDuration = timeoutDuration; } - /** - * Returns the first string value found in the result object for the given keys. - * Used to extract a value (such as a UUID, text, or other result parameter) from a result object. - * The keys provided in resultKeys may refer to UUIDs (e.g., mediaUUID, imageUUID, videoUUID) or other string fields (e.g., text for captioning). - * @param result - The result object to search for a string value. - * @param resultKeys - The list of keys to check in order of priority. - * @returns The first string value found for the specified keys, or undefined if none found. - */ - private getResultValue(result: any, resultKeys = ["mediaUUID", "imageUUID", "videoUUID"]): string | undefined { - for (const key of resultKeys) { - if (typeof result[key] === "string") return result[key]; - } - return undefined; - } - /** * Shared polling logic for async results. * @param taskUUID - The task UUID to poll for. * @param numberResults - Number of results expected. * @returns Promise resolving to array of results. */ - private async pollForAsyncResults({ + private async pollForAsyncResults({ taskUUID, - resultKeys, numberResults = 1, }: { taskUUID: string; - resultKeys?: string[]; numberResults?: number; }): Promise { const allResults = new Map(); await getIntervalAsyncWithPromise( async ({ resolve, reject }) => { try { - const results = await this.getResponse({ taskUUID }); + const response = await this.getResponse({ taskUUID }); // Add results to the collection - for (const result of results || []) { - const resultValue = this.getResultValue(result, resultKeys); - if (resultValue) { - allResults.set(resultValue, result); + for (const responseItem of response || []) { + if (responseItem.status === "success") { + allResults.set(responseItem.taskUUID, responseItem); } } @@ -855,7 +837,6 @@ export class RunwareBase { const taskUUID = request?.taskUUID; const results = await this.pollForAsyncResults({ taskUUID, - resultKeys: ["text"], }); return results[0]; } diff --git a/Runware/types.ts b/Runware/types.ts index c809dfb..d56bbc5 100644 --- a/Runware/types.ts +++ b/Runware/types.ts @@ -49,6 +49,7 @@ export interface IImage { imageUUID?: string; inputImageUUID?: string; taskUUID: string; + status: string; imageURL?: string; imageBase64Data?: string; imageDataURI?: string; @@ -269,6 +270,7 @@ export interface IRequestImageToText extends IAdditionalResponsePayload { export interface IImageToText { taskType: ETaskType; taskUUID: string; + status: string; text: string; cost?: number; } @@ -351,6 +353,7 @@ export interface IAsyncResults { export interface IRemoveImage { taskType: ETaskType; taskUUID: string; + status: string; imageUUID?: string; mediaUUID?: string; mediaURL?: string; @@ -614,6 +617,7 @@ export type TPhotoMaker = { export type TPhotoMakerResponse = { taskType: string; taskUUID: string; + status: string; imageUUID: string; NSFWContent: boolean; cost: number; From 1f4ea0e83e0d205c02f78b5ce54b6f0430af4ca0 Mon Sep 17 00:00:00 2001 From: Ian Lunn Date: Tue, 11 Nov 2025 12:00:43 +0000 Subject: [PATCH 8/8] Update version --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index ac6b3bd..51a427c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@runware/sdk-js", - "version": "1.2.1", + "version": "1.2.1-beta.2", "description": "The SDK is used to run image inference with the Runware API, powered by the RunWare inference platform. It can be used to generate imaged with text-to-image and image-to-image. It also allows the use of an existing gallery of models or selecting any model or LoRA from the CivitAI gallery. The API also supports upscaling, background removal, inpainting and outpainting, and a series of other ControlNet models.", "main": "dist/index.js", "module": "dist/index.js",