From 2b3fc13febb41752e9ea04b607d45b66cd85f824 Mon Sep 17 00:00:00 2001 From: Nishit24113 Date: Wed, 29 Apr 2026 13:53:03 -0700 Subject: [PATCH] Fix Bedrock aspect ratio rejection for form PDFs --- .../alt_text_generator.js | 60 +++++++++++++++++-- 1 file changed, 54 insertions(+), 6 deletions(-) diff --git a/alt-text-generator-container/alt_text_generator.js b/alt-text-generator-container/alt_text_generator.js index 05e39a98..eecd3a75 100644 --- a/alt-text-generator-container/alt_text_generator.js +++ b/alt-text-generator-container/alt_text_generator.js @@ -69,6 +69,42 @@ function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } +const MAX_ASPECT_RATIO = 20; + +function getImageDimensions(buffer) { + try { + if (buffer[0] === 0x89 && buffer[1] === 0x50 && buffer[2] === 0x4E && buffer[3] === 0x47) { + const width = buffer.readUInt32BE(16); + const height = buffer.readUInt32BE(20); + return { width, height }; + } + if (buffer[0] === 0xFF && buffer[1] === 0xD8) { + let offset = 2; + while (offset < buffer.length - 1) { + if (buffer[offset] !== 0xFF) break; + const marker = buffer[offset + 1]; + if (marker === 0xC0 || marker === 0xC2) { + const height = buffer.readUInt16BE(offset + 5); + const width = buffer.readUInt16BE(offset + 7); + return { width, height }; + } + const segmentLength = buffer.readUInt16BE(offset + 2); + offset += 2 + segmentLength; + } + } + return null; + } catch (e) { + return null; + } +} + +function isAspectRatioValid(dimensions) { + if (!dimensions || !dimensions.width || !dimensions.height) return true; + const { width, height } = dimensions; + const ratio = Math.max(width / height, height / width); + return ratio <= MAX_ASPECT_RATIO; +} + /** * Invokes the Bedrock AI model to generate alt text for a given image. @@ -492,6 +528,8 @@ async function startProcess() { logger.info(`Filename: ${filebasename} | imageObjects: ${imageObjects}`); logger.info(`Filename: ${filebasename} | Total images to process: ${imageObjects.length}`); + let skippedCount = 0; + for (const imageObject of imageObjects) { try { const getObjectParams = { @@ -502,7 +540,7 @@ async function startProcess() { logger.info(`Filename: ${filebasename} | Image Object Bucketname: ${bucketName}`); const command = new GetObjectCommand(getObjectParams); const { Body } = await s3Client.send(command); - + // Stream the body contents to a buffer const chunks = []; await pipeline(Body, async function* (source) { @@ -511,6 +549,16 @@ async function startProcess() { } }); const fileBuffer = Buffer.concat(chunks); + + const dimensions = getImageDimensions(fileBuffer); + if (dimensions && !isAspectRatioValid(dimensions)) { + skippedCount++; + const altText = "Decorative element"; + combinedResults[imageObject.id] = altText; + logger.info(`Filename: ${filebasename} | Skipping image ${imageObject.id} - aspect ratio ${Math.max(dimensions.width/dimensions.height, dimensions.height/dimensions.width).toFixed(1)}:1 exceeds ${MAX_ASPECT_RATIO}:1 limit (${dimensions.width}x${dimensions.height})`); + continue; + } + const localFilePath = path.join(__dirname, `${imageObject.path.split('/').pop()}`); logger.info(`Filename: ${filebasename} | Local File Path: ${localFilePath}`); fs_1.writeFileSync(localFilePath, fileBuffer); @@ -519,23 +567,23 @@ async function startProcess() { logger.info(`Filename: ${filebasename} | Response:${response}`); Object.assign(combinedResults, JSON.parse(response)); successCount++; - logger.info(`Filename: ${filebasename} | Alt text generation succeeded for image ${imageObject.id} (${successCount} succeeded, ${failureCount} failed)`); + logger.info(`Filename: ${filebasename} | Alt text generation succeeded for image ${imageObject.id} (${successCount} succeeded, ${failureCount} failed, ${skippedCount} skipped)`); } catch (error) { failureCount++; logger.error(`Filename: ${filebasename} | Alt text generation failed for image ${imageObject.id}: ${error.message || error}`); - logger.info(`Filename: ${filebasename} | Progress: ${successCount} succeeded, ${failureCount} failed`); + logger.info(`Filename: ${filebasename} | Progress: ${successCount} succeeded, ${failureCount} failed, ${skippedCount} skipped`); } await sleep(2000); } - // Check if we have any images and if all of them failed - if (imageObjects.length > 0 && successCount === 0) { + // Only fail if all images failed AND none were skipped (skipped images got default alt text) + if (imageObjects.length > 0 && successCount === 0 && skippedCount === 0) { logger.error(`Filename: ${filebasename} | All ${failureCount} alt text generation requests failed - likely due to throttling or Bedrock API issues`); logger.error(`File: ${filebasename}, Status: Failed in second ECS task - All Bedrock requests failed`); process.exit(1); } - logger.info(`Filename: ${filebasename} | Alt text generation complete: ${successCount} succeeded, ${failureCount} failed out of ${imageObjects.length} images`); + logger.info(`Filename: ${filebasename} | Alt text generation complete: ${successCount} succeeded, ${failureCount} failed, ${skippedCount} skipped (bad aspect ratio) out of ${imageObjects.length} images`); let defaultText = "No text available";