Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 45 additions & 79 deletions Runware/Runware-base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,23 +95,13 @@ export class RunwareBase {
this._timeoutDuration = timeoutDuration;
}

private getResultUUID(result: any): string | undefined {
// Find the UUID for a given input type
// mediaUUID = generic input
// Others = specific input types
for (const key of ["mediaUUID", "imageUUID", "videoUUID"]) {
if (typeof result[key] === "string") return result[key];
}
return undefined;
}

/**
* Shared polling logic for async results.
* @param taskUUID - The task UUID to poll for.
* @param numberResults - Number of results expected.
* @returns Promise resolving to array of results.
*/
private async pollForAsyncResults<T extends {mediaUUID?: string; imageUUID?: string; videoUUID?: string;}>({
private async pollForAsyncResults<T extends { status: string; taskUUID: string; }>({
taskUUID,
numberResults = 1,
}: {
Expand All @@ -122,13 +112,12 @@ export class RunwareBase {
await getIntervalAsyncWithPromise(
async ({ resolve, reject }) => {
try {
const results = await this.getResponse<T>({ taskUUID });
const response = await this.getResponse<T>({ taskUUID });

// Add results to the collection
for (const result of results || []) {
const resultUUID = this.getResultUUID(result);
if (resultUUID) {
allResults.set(resultUUID, result);
for (const responseItem of response || []) {
if (responseItem.status === "success") {
allResults.set(responseItem.taskUUID, responseItem);
}
}

Expand Down Expand Up @@ -799,83 +788,60 @@ export class RunwareBase {

requestImageToText = async ({
inputImage,
inputs,
includeCost,
customTaskUUID,
taskUUID: _taskUUID,
retry,
includePayload,
includeGenerationTime,
deliveryMethod,
skipResponse,
model,
}: IRequestImageToText): Promise<IImageToText> => {
const totalRetry = retry || this._globalMaxRetries;
let lis: any = undefined;

const startTime = Date.now();

try {
return await asyncRetry(
async () => {
await this.ensureConnection();
const imageUploaded = inputImage
? await this.uploadImage(inputImage as File | string)
: null;

const taskUUID = _taskUUID || customTaskUUID || getUUID();

const payload = {
taskUUID,
taskType: ETaskType.IMAGE_CAPTION,
inputImage: imageUploaded?.imageUUID,
...evaluateNonTrue({ key: "includeCost", value: includeCost }),
};

this.send(payload);

lis = this.globalListener({
taskUUID,
});

const response = await getIntervalWithPromise(
({ resolve, reject }) => {
const newReverseClip = this.getSingleMessage({
taskUUID,
});
let imageUploaded;

if (!newReverseClip) return;
// TODO: Add support for handling all media uploads from inputs object
// This is legacy support for inputImage only
if (inputImage) {
imageUploaded = await this.uploadImage(inputImage as File | string);
}

if (newReverseClip?.error) {
reject(newReverseClip);
return true;
}
const taskUUID = _taskUUID || customTaskUUID || getUUID();
const payload = {
taskUUID,
taskType: ETaskType.CAPTION,
model,
inputImage: imageUploaded?.imageUUID,
inputs,
...evaluateNonTrue({ key: "includeCost", value: includeCost }),
retry,
includePayload,
includeGenerationTime,
};

if (newReverseClip) {
delete this._globalMessages[taskUUID];
resolve(newReverseClip);
return true;
}
},
{
debugKey: "remove-image-background",
timeoutDuration: this._timeoutDuration,
}
);
const request = await this.baseSingleRequest<IImageToText>({
payload: {
...payload,
taskType: ETaskType.CAPTION,
},
debugKey: "caption",
});

lis.destroy();
if (skipResponse) {
return request;
}

this.insertAdditionalResponse({
response: response,
payload: includePayload ? payload : undefined,
startTime: includeGenerationTime ? startTime : undefined,
});
if (deliveryMethod === "async") {
const taskUUID = request?.taskUUID;
const results = await this.pollForAsyncResults<IImageToText>({
taskUUID,
});
return results[0];
}

return response as IImageToText;
},
{
maxRetries: totalRetry,
callback: () => {
lis?.destroy();
},
}
);
return request;
} catch (e) {
throw e;
}
Expand Down
16 changes: 15 additions & 1 deletion Runware/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ export enum ETaskType {
UPSCALE = "upscale",
REMOVE_BACKGROUND = "removeBackground",
VIDEO_INFERENCE = "videoInference",
CAPTION = "caption",
GET_RESPONSE = "getResponse",
PHOTO_MAKER = "photoMaker",
IMAGE_CAPTION = "imageCaption",
IMAGE_CONTROL_NET_PRE_PROCESS = "imageControlNetPreProcess",
IMAGE_MASKING = "imageMasking",
PROMPT_ENHANCE = "promptEnhance",
Expand Down Expand Up @@ -49,6 +49,7 @@ export interface IImage {
imageUUID?: string;
inputImageUUID?: string;
taskUUID: string;
status: string;
imageURL?: string;
imageBase64Data?: string;
imageDataURI?: string;
Expand Down Expand Up @@ -251,15 +252,25 @@ export interface IRefiner {
startStepPercentage?: number;
}
export interface IRequestImageToText extends IAdditionalResponsePayload {
model?: string;
inputImage?: File | string;
inputs?: {
video?: InputsValue;
} & {
[key: string]: unknown;
};
includeCost?: boolean;
customTaskUUID?: string;
taskUUID?: string;
retry?: number;

deliveryMethod?: string;
skipResponse?: boolean;
}
export interface IImageToText {
taskType: ETaskType;
taskUUID: string;
status: string;
text: string;
cost?: number;
}
Expand Down Expand Up @@ -333,6 +344,7 @@ export interface IRequestVideo extends IRequestImageToText {

[key: string]: any;
}

export interface IAsyncResults {
taskUUID: string;
onPartialImages?: (images: IImage[], error?: IError) => void;
Expand All @@ -341,6 +353,7 @@ export interface IAsyncResults {
export interface IRemoveImage {
taskType: ETaskType;
taskUUID: string;
status: string;
imageUUID?: string;
mediaUUID?: string;
mediaURL?: string;
Expand Down Expand Up @@ -604,6 +617,7 @@ export type TPhotoMaker = {
export type TPhotoMakerResponse = {
taskType: string;
taskUUID: string;
status: string;
imageUUID: string;
NSFWContent: boolean;
cost: number;
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@runware/sdk-js",
"version": "1.2.0",
"version": "1.2.1-beta.2",
"description": "The SDK is used to run image inference with the Runware API, powered by the RunWare inference platform. It can be used to generate imaged with text-to-image and image-to-image. It also allows the use of an existing gallery of models or selecting any model or LoRA from the CivitAI gallery. The API also supports upscaling, background removal, inpainting and outpainting, and a series of other ControlNet models.",
"main": "dist/index.js",
"module": "dist/index.js",
Expand Down
30 changes: 30 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,32 @@ return interface IControlNetImage {

&nbsp;

### Request Caption

[Read Documentation](https://docs.runware.ai/en/utilities/caption)

```js

const runware = new Runware({ apiKey: "API_KEY" });
const caption = await runware.caption({
"model": "memories:1@1",
inputs: {
video: "https://example.com/video.mp4"
}
});

console.log(caption)

return interface IImageToText {
taskType: ETaskType;
taskUUID: string;
text: string;
cost?: number;
}
```

&nbsp;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not including table explaining params -- the info is not available and better to have single source of truth from linked docs

### Model Upload

[Read Documentation](https://docs.runware.ai/en/image-inference/model-upload)
Expand Down Expand Up @@ -679,6 +705,10 @@ export type TImageMaskingResponse = {

## Changelog

### - v1.2.1

- Added caption task type

### - v1.2.0

- Change removeImageBackground taskType from `removeImageBackground` to `removeBackground` -- removeBackground is compatible with removeImageBackground but it also supports other media inputs such as removing backgrounds from videos
Expand Down
Loading