diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index ddc2b6f2d..ea13b93f2 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -395,6 +395,9 @@ if validation_df is not None: # Clean up temporary file if os.path.exists(temp_csv): os.unlink(temp_csv) + +# Clean up RecordSet and CurationTask +curation_task.delete(delete_source=True) ``` In this example you would expect to get results like: diff --git a/synapseclient/models/curation.py b/synapseclient/models/curation.py index 6b3eb5843..89107c1d2 100644 --- a/synapseclient/models/curation.py +++ b/synapseclient/models/curation.py @@ -45,7 +45,7 @@ class FileBasedMetadataTaskProperties: A CurationTaskProperties for file-based data, describing where data is uploaded and a view which contains the annotations. - Represents a [Synapse FileBasedMetadataTaskProperties](https://rest-docs.synapse.org/org/sagebionetworks/repo/model/curation/metadata/FileBasedMetadataTaskProperties.html). + Represents a [Synapse FileBasedMetadataTaskProperties](https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/curation/metadata/FileBasedMetadataTaskProperties.html). Attributes: upload_folder_id: The synId of the folder where data files of this type are to be uploaded @@ -94,7 +94,7 @@ class RecordBasedMetadataTaskProperties: """ A CurationTaskProperties for record-based metadata. - Represents a [Synapse RecordBasedMetadataTaskProperties](https://rest-docs.synapse.org/org/sagebionetworks/repo/model/curation/metadata/RecordBasedMetadataTaskProperties.html). + Represents a [Synapse RecordBasedMetadataTaskProperties](https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/curation/metadata/RecordBasedMetadataTaskProperties.html). Attributes: record_set_id: The synId of the RecordSet that will contain all record-based metadata @@ -213,11 +213,18 @@ def get(self, *, synapse_client: Optional[Synapse] = None) -> "CurationTask": """ return self - def delete(self, *, synapse_client: Optional[Synapse] = None) -> None: + def delete( + self, + delete_source: bool = False, + *, + synapse_client: Optional[Synapse] = None, + ) -> None: """ Deletes a CurationTask from Synapse. Arguments: + delete_source: If True, the associated source data (EntityView or RecordSet) will also be deleted + if the task is a FileBasedMetadataTask or RecordBasedMetadataTask respectively. Defaults to False. synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. @@ -238,6 +245,20 @@ def delete(self, *, synapse_client: Optional[Synapse] = None) -> None: task = CurationTask(task_id=123) task.delete() ``` + + Example: Delete a curation task and its associated data source +   + + ```python + from synapseclient import Synapse + from synapseclient.models import CurationTask + + syn = Synapse() + syn.login() + + task = CurationTask(task_id=123) + task.delete(delete_source=True) + ``` """ return None @@ -602,17 +623,26 @@ async def main(): self._set_last_persistent_instance() return self - async def delete_async(self, *, synapse_client: Optional[Synapse] = None) -> None: + async def delete_async( + self, + delete_source: bool = False, + *, + synapse_client: Optional[Synapse] = None, + ) -> None: """ Deletes a CurationTask from Synapse. Arguments: + delete_source: If True, the associated source data (EntityView or RecordSet) will also be deleted + if the task is a FileBasedMetadataTask or RecordBasedMetadataTask respectively. Defaults to False. synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. Raises: ValueError: If the CurationTask object does not have a task_id. + ValueError: If delete_source is True but the task properties are not properly set + to identify the source to delete. Example: Delete a curation task asynchronously   @@ -632,6 +662,25 @@ async def main(): asyncio.run(main()) ``` + + Example: Delete a curation task and its associated data source asynchronously +   + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import CurationTask + + syn = Synapse() + syn.login() + + async def main(): + task = CurationTask(task_id=123) + await task.delete_async(delete_source=True) + print("Task and record set deleted successfully") + + asyncio.run(main()) + ``` """ if not self.task_id: raise ValueError("task_id is required to delete a CurationTask") @@ -642,6 +691,41 @@ async def main(): } ) + if delete_source: + if not self.task_properties: + await self.get_async(synapse_client=synapse_client) + + if isinstance(self.task_properties, FileBasedMetadataTaskProperties): + if not self.task_properties.file_view_id: + raise ValueError( + "Cannot delete Fileview: " + "'file_view_id' attribute is missing." + ) + from synapseclient.models import EntityView + + await EntityView(id=self.task_properties.file_view_id).delete_async( + synapse_client=synapse_client + ) + + elif isinstance(self.task_properties, RecordBasedMetadataTaskProperties): + if not self.task_properties.record_set_id: + raise ValueError( + "Cannot delete RecordSet: " + "'record_set_id' attribute is missing." + ) + from synapseclient.models import RecordSet + + await RecordSet(id=self.task_properties.record_set_id).delete_async( + synapse_client=synapse_client + ) + + else: + raise ValueError( + "'task_property' attribute is None. " + "Deletion only supports FileBasedMetadataTaskProperties or " + "RecordBasedMetadataTaskProperties." + ) + await delete_curation_task(task_id=self.task_id, synapse_client=synapse_client) async def store_async( diff --git a/tests/integration/synapseclient/models/async/test_curation_async.py b/tests/integration/synapseclient/models/async/test_curation_async.py index a07183e67..14111aa13 100644 --- a/tests/integration/synapseclient/models/async/test_curation_async.py +++ b/tests/integration/synapseclient/models/async/test_curation_async.py @@ -10,6 +10,7 @@ from synapseclient import Synapse from synapseclient.core.exceptions import SynapseHTTPError +from synapseclient.core.utils import make_bogus_uuid_file from synapseclient.models import ( Column, ColumnType, @@ -370,7 +371,7 @@ def init(self, syn: Synapse, schedule_for_cleanup: Callable[..., None]) -> None: self.syn = syn self.schedule_for_cleanup = schedule_for_cleanup - @pytest.fixture(scope="class") + @pytest.fixture(scope="function") async def folder_with_view( self, project_model: Project, @@ -412,7 +413,35 @@ async def folder_with_view( return folder, entity_view - async def test_delete_curation_task_async( + @pytest.fixture(scope="function") + async def folder_with_record_set( + self, + project_model: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + ) -> tuple[Folder, EntityView]: + """Create a folder with a a record set for record-based testing.""" + # Create a folder + folder = await Folder( + name=str(uuid.uuid4()), + parent_id=project_model.id, + ).store_async(synapse_client=syn) + schedule_for_cleanup(folder.id) + + filename = make_bogus_uuid_file() + schedule_for_cleanup(filename) + + record_set = await RecordSet( + name=str(uuid.uuid4()), + parent_id=folder.id, + path=filename, + upsert_keys=["xxx"], + ).store_async(synapse_client=syn) + schedule_for_cleanup(record_set.id) + + return folder, record_set + + async def test_delete_file_based_curation_task_async( self, project_model: Project, folder_with_view: tuple[Folder, EntityView] ) -> None: # GIVEN a project, folder, and entity view @@ -434,13 +463,112 @@ async def test_delete_curation_task_async( task_id = curation_task.task_id assert task_id is not None - # WHEN I delete the task asynchronously - await curation_task.delete_async(synapse_client=self.syn) + # WHEN I delete the task asynchronously, without deleting the file view + await curation_task.delete_async(synapse_client=self.syn, delete_source=False) + + # THEN the task should be deleted and no longer retrievable + with pytest.raises(SynapseHTTPError): + await CurationTask(task_id=task_id).get_async(synapse_client=self.syn) + + # AND the file view should not be deleted + await EntityView(entity_view.id).get_async(synapse_client=self.syn) + + async def test_delete_file_based_curation_task_and_fileview_async( + self, project_model: Project, folder_with_view: tuple[Folder, EntityView] + ) -> None: + # GIVEN a project, folder, and entity view + folder, entity_view = folder_with_view + + # GIVEN an existing curation task + data_type = f"test_data_type_{str(uuid.uuid4()).replace('-', '_')}" + task_properties = FileBasedMetadataTaskProperties( + upload_folder_id=folder.id, + file_view_id=entity_view.id, + ) + curation_task = await CurationTask( + data_type=data_type, + project_id=project_model.id, + instructions="Task to be deleted", + task_properties=task_properties, + ).store_async(synapse_client=self.syn) + + task_id = curation_task.task_id + assert task_id is not None + + # WHEN I delete the task and fileview asynchronously + await curation_task.delete_async(synapse_client=self.syn, delete_source=True) + + # THEN the task should be deleted and no longer retrievable + with pytest.raises(SynapseHTTPError): + await CurationTask(task_id=task_id).get_async(synapse_client=self.syn) + + # AND the file view should be deleted and no longer retrievable + with pytest.raises(SynapseHTTPError): + await EntityView(entity_view.id).get_async(synapse_client=self.syn) + + async def test_delete_record_based_curation_task_async( + self, project_model: Project, folder_with_record_set: tuple[Folder, EntityView] + ) -> None: + # GIVEN a folder, and record set + _, record_set = folder_with_record_set + + # GIVEN an existing curation task + data_type = f"test_data_type_{str(uuid.uuid4()).replace('-', '_')}" + task_properties = RecordBasedMetadataTaskProperties( + record_set_id=record_set.id, + ) + curation_task = await CurationTask( + data_type=data_type, + project_id=project_model.id, + instructions="Task to be deleted", + task_properties=task_properties, + ).store_async(synapse_client=self.syn) + + task_id = curation_task.task_id + assert task_id is not None + + # WHEN I delete the task asynchronously, without deleting the record set + await curation_task.delete_async(synapse_client=self.syn, delete_source=False) # THEN the task should be deleted and no longer retrievable with pytest.raises(SynapseHTTPError): await CurationTask(task_id=task_id).get_async(synapse_client=self.syn) + # AND the record set should not be deleted + await RecordSet(record_set.id).get_async(synapse_client=self.syn) + + async def test_delete_record_based_curation_task_and_record_set_async( + self, project_model: Project, folder_with_record_set: tuple[Folder, EntityView] + ) -> None: + # GIVEN a folder, and record set + _, record_set = folder_with_record_set + + # GIVEN an existing curation task + data_type = f"test_data_type_{str(uuid.uuid4()).replace('-', '_')}" + task_properties = RecordBasedMetadataTaskProperties( + record_set_id=record_set.id, + ) + curation_task = await CurationTask( + data_type=data_type, + project_id=project_model.id, + instructions="Task to be deleted", + task_properties=task_properties, + ).store_async(synapse_client=self.syn) + + task_id = curation_task.task_id + assert task_id is not None + + # WHEN I delete the task asynchronously, without deleting the record set + await curation_task.delete_async(synapse_client=self.syn, delete_source=True) + + # THEN the task should be deleted and no longer retrievable + with pytest.raises(SynapseHTTPError): + await CurationTask(task_id=task_id).get_async(synapse_client=self.syn) + + # AND the record set should be deleted and not retrievable + with pytest.raises(SynapseHTTPError): + await RecordSet(record_set.id).get_async(synapse_client=self.syn) + async def test_delete_validation_error_async(self) -> None: # GIVEN a CurationTask without a task_id curation_task = CurationTask()