diff --git a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDataset.java b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDataset.java index 0c38f414e0a..0fefb66ab62 100644 --- a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDataset.java +++ b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDataset.java @@ -152,9 +152,16 @@ protected Collection getCopyableFilesImpl(CopyConfiguratio Path filePathRelativeToSearchPath = PathUtils.relativizePath(file.getPath(), replacedPrefix); Path thisTargetPath = new Path(replacingPrefix, filePathRelativeToSearchPath); + // Use the file's parent as the starting point for ancestor permission resolution, unless the + // parent is above replacedPrefix (happens when the source root is empty and FileListUtils + // returns the root directory itself). In that case use the file's own path so the walk + // terminates immediately with an empty ancestors list. + Path parentPath = file.getPath().getParent(); + Path ancestorFromPath = PathUtils.isAncestor(replacedPrefix, parentPath) ? parentPath : file.getPath(); + if (this.useNewPreserveLogic) { ancestorOwnerAndPermissions.putAll(CopyableFile - .resolveReplicatedAncestorOwnerAndPermissionsRecursively(this.fs, file.getPath().getParent(), + .resolveReplicatedAncestorOwnerAndPermissionsRecursively(this.fs, ancestorFromPath, replacedPrefix, configuration)); } @@ -163,7 +170,7 @@ protected Collection getCopyableFilesImpl(CopyConfiguratio .fileSet(datasetURN()) .datasetOutputPath(thisTargetPath.toString()) .ancestorsOwnerAndPermission(CopyableFile - .resolveReplicatedOwnerAndPermissionsRecursively(this.fs, file.getPath().getParent(), + .resolveReplicatedOwnerAndPermissionsRecursively(this.fs, ancestorFromPath, replacedPrefix, configuration)) .build(); copyableFile.setFsDatasets(this.fs, targetFs); diff --git a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDatasetTest.java b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDatasetTest.java index ed87a7c219d..8a5b991cfee 100644 --- a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDatasetTest.java +++ b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDatasetTest.java @@ -253,6 +253,40 @@ public boolean apply(@Nullable CopyEntity copyEntity) { Assert.assertEquals(step.getParentDeletionLimit().get(), target); } + /** + * When source.path points to an empty directory, FileListUtils.listFilesToCopyAtPath (with + * includeEmptyDirectories=true) returns the directory itself as the sole FileStatus entry. + * getCopyableFilesImpl must replicate the empty directory at the destination without crashing — + * calling file.getPath().getParent() on the directory entry goes above the dataset root and + * breaks the ancestry check, so ancestor permissions must be set to empty for directory entries. + */ + @Test + public void testEmptySourceDirectoryProducesCopyEntityForDirectory() throws Exception { + Path source = new Path("/source"); + Path target = new Path("/target"); + + // Simulate what FileListUtils returns for an empty directory: the directory itself (isDirectory=true) + FileStatus emptyDirEntry = new FileStatus(0, true, 0, 0, 0, source); + List sourceFiles = Lists.newArrayList(emptyDirEntry); + List targetFiles = Lists.newArrayList(); + + Properties properties = new Properties(); + properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString()); + RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles, targetFiles, properties); + + Collection copyEntities = dataset.getCopyableFiles(FileSystem.getLocal(new Configuration()), + CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build()); + + // The empty directory itself should be replicated at the destination (not silently dropped) + Assert.assertEquals(copyEntities.size(), 1, + "Empty source directory should produce one copy entity to replicate the directory at the destination"); + ClassifiedFiles classifiedFiles = classifyFiles(copyEntities); + Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(source), + "The empty directory itself should be the copy origin"); + Assert.assertEquals(classifiedFiles.getPathsToCopy().get(source), target, + "The empty directory should be mapped to the target path"); + } + @Test public void testCorrectComputationOfTargetPathsWhenUsingGlob() throws Exception { Path source = new Path("/source/directory");