@@ -10,21 +10,15 @@ import { type NextRequest, NextResponse } from 'next/server'
1010import { fileParseContract } from '@/lib/api/contracts/storage-transfer'
1111import { getValidationErrorMessage , parseRequest } from '@/lib/api/server'
1212import { checkInternalAuth } from '@/lib/auth/hybrid'
13- import {
14- secureFetchWithPinnedIP ,
15- validateUrlWithDNS ,
16- } from '@/lib/core/security/input-validation.server'
1713import { sanitizeUrlForLog } from '@/lib/core/utils/logging'
18- import {
19- assertKnownSizeWithinLimit ,
20- DEFAULT_MAX_ERROR_BODY_BYTES ,
21- isPayloadSizeLimitError ,
22- readResponseTextWithLimit ,
23- readResponseToBufferWithLimit ,
24- } from '@/lib/core/utils/stream-limits'
14+ import { assertKnownSizeWithinLimit , isPayloadSizeLimitError } from '@/lib/core/utils/stream-limits'
2515import { isSupportedFileType , parseFile } from '@/lib/file-parsers'
2616import { isUsingCloudStorage , type StorageContext , StorageService } from '@/lib/uploads'
2717import { uploadExecutionFile } from '@/lib/uploads/contexts/execution'
18+ import {
19+ ExternalUrlValidationError ,
20+ fetchExternalUrlToWorkspace ,
21+ } from '@/lib/uploads/contexts/workspace'
2822import { UPLOAD_DIR_SERVER } from '@/lib/uploads/core/setup.server'
2923import { getFileMetadataByKey } from '@/lib/uploads/server/metadata'
3024import {
@@ -36,7 +30,6 @@ import {
3630 inferContextFromKey ,
3731 isInternalFileUrl ,
3832} from '@/lib/uploads/utils/file-utils'
39- import { getUserEntityPermissions } from '@/lib/workspaces/permissions/utils'
4033import { verifyFileAccess } from '@/app/api/files/authorization'
4134import type { UserFile } from '@/executor/types'
4235import '@/lib/uploads/core/setup.server'
@@ -453,9 +446,16 @@ function validateFilePath(filePath: string): { isValid: boolean; error?: string
453446}
454447
455448/**
456- * Handle external URL
457- * If workspaceId is provided, checks if file already exists and saves to workspace if not
458- * If executionContext is provided, also stores the file in execution storage and returns UserFile
449+ * Handle external URL.
450+ *
451+ * Always fetches the URL fresh — there is no filename-based dedup. Distinct URLs
452+ * commonly share a path tail (e.g. every Slack clipboard paste is `image.png`),
453+ * so keying a cache by filename returns stale bytes. `fetchExternalUrlToWorkspace`
454+ * delegates to `uploadWorkspaceFile`, which suffix-disambiguates collisions on save.
455+ *
456+ * Workspace save is skipped when the URL already points at our execution-files
457+ * bucket (re-uploading our own bytes is wasteful and would generate `image (1).png`
458+ * style aliases for files we already own).
459459 */
460460async function handleExternalUrl (
461461 url : string ,
@@ -470,23 +470,6 @@ async function handleExternalUrl(
470470) : Promise < ParseResult > {
471471 try {
472472 logger . info ( 'Fetching external URL:' , url )
473- logger . info ( 'WorkspaceId for URL save:' , workspaceId )
474-
475- const urlValidation = await validateUrlWithDNS ( url , 'fileUrl' )
476- if ( ! urlValidation . isValid ) {
477- logger . warn ( `Blocked external URL request: ${ urlValidation . error } ` )
478- return {
479- success : false ,
480- error : urlValidation . error || 'Invalid external URL' ,
481- filePath : url ,
482- }
483- }
484-
485- const urlPath = new URL ( url ) . pathname
486- const filename = urlPath . split ( '/' ) . pop ( ) || 'download'
487- const extension = path . extname ( filename ) . toLowerCase ( ) . substring ( 1 )
488-
489- logger . info ( `Extracted filename: ${ filename } , workspaceId: ${ workspaceId } ` )
490473
491474 const {
492475 S3_EXECUTION_FILES_CONFIG ,
@@ -511,104 +494,27 @@ async function handleExternalUrl(
511494 isExecutionFile = false
512495 }
513496
514- // Only apply workspace deduplication if:
515- // 1. WorkspaceId is provided
516- // 2. URL is NOT from execution files bucket/container
517- const shouldCheckWorkspace = workspaceId && ! isExecutionFile
518-
519- if ( shouldCheckWorkspace ) {
520- const permission = await getUserEntityPermissions ( userId , 'workspace' , workspaceId )
521- if ( permission === null ) {
522- logger . warn ( 'User does not have workspace access for file parse' , {
523- userId,
524- workspaceId,
525- filename,
526- } )
527- return {
528- success : false ,
529- error : 'File not found' ,
530- filePath : url ,
531- }
532- }
533-
534- const { fileExistsInWorkspace, listWorkspaceFiles } = await import (
535- '@/lib/uploads/contexts/workspace'
536- )
537- const exists = await fileExistsInWorkspace ( workspaceId , filename )
538-
539- if ( exists ) {
540- logger . info ( `File ${ filename } already exists in workspace, using existing file` )
541- const workspaceFiles = await listWorkspaceFiles ( workspaceId )
542- const existingFile = workspaceFiles . find ( ( f ) => f . name === filename )
543-
544- if ( existingFile ) {
545- const storageFilePath = `/api/files/serve/${ existingFile . key } `
546- return handleCloudFile (
547- storageFilePath ,
548- fileType ,
549- 'workspace' ,
550- userId ,
551- executionContext ,
552- maxDownloadBytes ,
553- maxParsedOutputBytes
554- )
555- }
556- }
557- }
558-
559- const response = await secureFetchWithPinnedIP ( url , urlValidation . resolvedIP ! , {
560- timeout : DOWNLOAD_TIMEOUT_MS ,
561- maxResponseBytes : maxDownloadBytes ,
562- signal,
563- ...( headers && Object . keys ( headers ) . length > 0 && { headers } ) ,
564- } )
565- if ( ! response . ok ) {
566- await readResponseTextWithLimit ( response , {
567- maxBytes : DEFAULT_MAX_ERROR_BODY_BYTES ,
568- label : 'file download error response' ,
569- signal,
570- } ) . catch ( ( ) => '' )
571- throw new Error ( `Failed to fetch URL: ${ response . status } ${ response . statusText } ` )
572- }
573-
574- const buffer = await readResponseToBufferWithLimit ( response , {
575- maxBytes : maxDownloadBytes ,
576- label : 'file download' ,
497+ const { filename, buffer, mimeType } = await fetchExternalUrlToWorkspace ( {
498+ url,
499+ userId,
500+ workspaceId : workspaceId || undefined ,
501+ saveToWorkspace : Boolean ( workspaceId ) && ! isExecutionFile ,
502+ headers,
577503 signal,
504+ maxDownloadBytes,
505+ timeoutMs : DOWNLOAD_TIMEOUT_MS ,
578506 } )
507+ const extension = path . extname ( filename ) . toLowerCase ( ) . substring ( 1 )
579508
580509 logger . info ( `Downloaded file from URL: ${ url } , size: ${ buffer . length } bytes` )
581510
582511 let userFile : UserFile | undefined
583- const mimeType = response . headers . get ( 'content-type' ) || getMimeTypeFromExtension ( extension )
584-
585512 if ( executionContext ) {
586513 try {
587514 userFile = await uploadExecutionFile ( executionContext , buffer , filename , mimeType , userId )
588515 logger . info ( `Stored file in execution storage: ${ filename } ` , { key : userFile . key } )
589516 } catch ( uploadError ) {
590- logger . warn ( `Failed to store file in execution storage:` , uploadError )
591- // Continue without userFile - parsing can still work
592- }
593- }
594-
595- if ( shouldCheckWorkspace ) {
596- try {
597- const permission = await getUserEntityPermissions ( userId , 'workspace' , workspaceId )
598- if ( permission !== 'admin' && permission !== 'write' ) {
599- logger . warn ( 'User does not have write permission for workspace file save' , {
600- userId,
601- workspaceId,
602- filename,
603- permission,
604- } )
605- } else {
606- const { uploadWorkspaceFile } = await import ( '@/lib/uploads/contexts/workspace' )
607- await uploadWorkspaceFile ( workspaceId , userId , buffer , filename , mimeType )
608- logger . info ( `Saved URL file to workspace storage: ${ filename } ` )
609- }
610- } catch ( saveError ) {
611- logger . warn ( `Failed to save URL file to workspace:` , saveError )
517+ logger . warn ( 'Failed to store file in execution storage:' , uploadError )
612518 }
613519 }
614520
@@ -657,6 +563,15 @@ async function handleExternalUrl(
657563 }
658564 }
659565
566+ if ( error instanceof ExternalUrlValidationError ) {
567+ logger . warn ( `Blocked external URL request: ${ error . message } ` )
568+ return {
569+ success : false ,
570+ error : error . message ,
571+ filePath : url ,
572+ }
573+ }
574+
660575 return {
661576 success : false ,
662577 error : `Error fetching URL: ${ ( error as Error ) . message } ` ,
0 commit comments