From 3175175a222dea8977dd91c9e33f063f06379581 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 5 Mar 2026 07:50:50 -0800 Subject: [PATCH 01/12] added examples for getting data into a grid --- .../extensions/curator/metadata_curation.md | 109 +++++++++++++++++- 1 file changed, 108 insertions(+), 1 deletion(-) diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index ea13b93f2..1aca74998 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -272,7 +272,114 @@ else: print("No validation results available. The Grid session must be exported to generate validation results.") ``` -### Example: Complete validation workflow for animal study metadata +### Example2: Getting data into a Grid for a file-based workflow + +The following example is for file-based curation. It assumes your data is in a CSV file. +It assumes your data is in a CSV file where each column is a property. + +```python +import pandas as pd +from synapseclient import Synapse +from synapseclient.models import CurationTask, Folder, File +from synapseclient.core.utils import make_bogus_data_file +from synapseclient.extensions.curator import create_file_based_metadata_task + +# 1. Replace all these values with your own information +PROJECT_ID = "syn68175188" +FOLDER_NAME = "Patient Curation Folder" +CSV_PATH = "patient.csv" +JSON_SCHEMA_URI = "dpetest-test.schematic.Patient" +CURATION_TASK_NAME = "File-based curation task for patients" +INSTRUCTIONS = "Please curate the patient information." + +# 2. Login to Synapse +syn = Synapse() +syn.login() + +# 3. Get annotations from CSV file +annotations = pd.read_csv(CSV_PATH).to_dict(orient="records")[0] + +# 4. Create a folder store the file that will be used for curation +folder = Folder(name=FOLDER_NAME, parent_id=PROJECT_ID) +folder = folder.store(synapse_client=syn) + +# 5. Create a file, annotate it, and store it in Synapse folder +path_to_file = make_bogus_data_file(n=5) +file = File(path=path_to_file, parent_id=folder.id, annotations=annotations) +file = file.store(synapse_client=syn) + +# 6. Create EntityView and CurationTask +view_id, task_id = create_file_based_metadata_task( + folder_id=folder.id, + curation_task_name=CURATION_TASK_NAME, + instructions=INSTRUCTIONS, + schema_uri=JSON_SCHEMA_URI, + synapse_client=syn, +) + +# 7. Cleanup all Synapse entities created +folder.delete(synapse_client=syn) +CurationTask(task_id=task_id).delete(synapse_client=syn, delete_source=True) +``` + +### Example2: Getting data into a Grid for a record-based workflow + +The following example is for record-based curation. +It assumes your data is in a CSV file where each column is a property. + +```python +import pandas as pd + +from synapseclient import Synapse +from synapseclient.models import Folder +from synapseclient.extensions.curator import create_record_based_metadata_task + + +# 1. Replace all these values with your own information +PROJECT_ID = "syn68175188" +FOLDER_NAME = "Patient Curation Folder" +CSV_PATH = "patient.csv" +JSON_SCHEMA_URI = "dpetest-test.schematic.Patient" +CURATION_TASK_NAME = "Record-based curation task for patients" +INSTRUCTIONS = "Please curate the patient information." +RECORD_SET_NAME = "Patient Record Set" +RECORD_SET_DESCRIPTION = "A record set for patients created for a file-based curation task example." +UPSERT_KEYS = ["PatientID"] + +# 2. Login to Synapse +syn = Synapse() +syn.login() + +# 3. Create a folder to store the RecordSet in +folder = Folder(name=FOLDER_NAME, parent_id=PROJECT_ID) +folder = folder.store(synapse_client=syn) + +# 4. Create RecordSet, CurationTask, and Grid +record_set, task, grid = create_record_based_metadata_task( + folder_id=folder.id, + record_set_name=RECORD_SET_NAME, + record_set_description=RECORD_SET_DESCRIPTION, + curation_task_name=CURATION_TASK_NAME, + upsert_keys=UPSERT_KEYS, + instructions=INSTRUCTIONS, + schema_uri=JSON_SCHEMA_URI, + synapse_client=syn, +) + +# 5. Store the record set with the path to the CSV file as an annotation. +# TODO: Add functionality to store data in a RecordSet programmatically +# This doesn't store the data in the RecordSet +record_set.get(synapse_client=syn) +record_set.path = CSV_PATH +record_set = record_set.store(synapse_client=syn) + +# 6. Cleanup all Synapse entities created +folder.delete(synapse_client=syn) +record_set.delete(synapse_client=syn, delete_source=True) +grid.delete(synapse_client=syn) +``` + +### Example3: Complete validation workflow for animal study metadata This example demonstrates the full workflow from creating a curation task through validating the submitted metadata: From 21998935b5fe3820a401db5806499d42cfad879d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 5 Mar 2026 08:08:10 -0800 Subject: [PATCH 02/12] clarified todo comment --- docs/guides/extensions/curator/metadata_curation.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index 1aca74998..c430b8f5d 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -367,8 +367,8 @@ record_set, task, grid = create_record_based_metadata_task( ) # 5. Store the record set with the path to the CSV file as an annotation. -# TODO: Add functionality to store data in a RecordSet programmatically -# This doesn't store the data in the RecordSet +# TODO: https://sagebionetworks.jira.com/browse/SYNPY-1781 +# Once SYNPY-1781 is finished add code here for uploading data from a CSV file into a grid session. record_set.get(synapse_client=syn) record_set.path = CSV_PATH record_set = record_set.store(synapse_client=syn) From 3620e392f1f28887258b0a4a1e021a3de7074c0f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 5 Mar 2026 10:04:21 -0800 Subject: [PATCH 03/12] un-number examples --- docs/guides/extensions/curator/metadata_curation.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index c430b8f5d..53fbaced6 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -272,7 +272,7 @@ else: print("No validation results available. The Grid session must be exported to generate validation results.") ``` -### Example2: Getting data into a Grid for a file-based workflow +### Example: Getting data into a Grid for a file-based workflow The following example is for file-based curation. It assumes your data is in a CSV file. It assumes your data is in a CSV file where each column is a property. @@ -322,7 +322,7 @@ folder.delete(synapse_client=syn) CurationTask(task_id=task_id).delete(synapse_client=syn, delete_source=True) ``` -### Example2: Getting data into a Grid for a record-based workflow +### Example: Getting data into a Grid for a record-based workflow The following example is for record-based curation. It assumes your data is in a CSV file where each column is a property. @@ -379,7 +379,7 @@ record_set.delete(synapse_client=syn, delete_source=True) grid.delete(synapse_client=syn) ``` -### Example3: Complete validation workflow for animal study metadata +### Example: Complete validation workflow for animal study metadata This example demonstrates the full workflow from creating a curation task through validating the submitted metadata: From 163e24b969b36337cc66bd149f26aead9c45d8b7 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 5 Mar 2026 10:05:03 -0800 Subject: [PATCH 04/12] remove redundant sentence --- docs/guides/extensions/curator/metadata_curation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index 53fbaced6..cdb413c36 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -274,7 +274,7 @@ else: ### Example: Getting data into a Grid for a file-based workflow -The following example is for file-based curation. It assumes your data is in a CSV file. +The following example is for file-based curation. It assumes your data is in a CSV file where each column is a property. ```python From fa74a188398406e47ac9ec152cfe2536038818f5 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 5 Mar 2026 10:12:02 -0800 Subject: [PATCH 05/12] change exmaple to annotate two files --- docs/guides/extensions/curator/metadata_curation.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index cdb413c36..da4e931ec 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -297,15 +297,18 @@ syn = Synapse() syn.login() # 3. Get annotations from CSV file -annotations = pd.read_csv(CSV_PATH).to_dict(orient="records")[0] +annotations = pd.read_csv(CSV_PATH).to_dict(orient="records") # 4. Create a folder store the file that will be used for curation folder = Folder(name=FOLDER_NAME, parent_id=PROJECT_ID) folder = folder.store(synapse_client=syn) -# 5. Create a file, annotate it, and store it in Synapse folder -path_to_file = make_bogus_data_file(n=5) -file = File(path=path_to_file, parent_id=folder.id, annotations=annotations) +# 5. Create a files, annotate them, and store them in the Synapse folder +path_to_file1 = make_bogus_data_file(n=5) +file = File(path=path_to_file1, parent_id=folder.id, annotations=annotations[0]) +file = file.store(synapse_client=syn) +path_to_file2 = make_bogus_data_file(n=5) +file = File(path=path_to_file2, parent_id=folder.id, annotations=annotations[1]) file = file.store(synapse_client=syn) # 6. Create EntityView and CurationTask From fc02a36483ba7c568523a20f4796d1c41a7fb82b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 5 Mar 2026 10:12:34 -0800 Subject: [PATCH 06/12] fix example script comment --- docs/guides/extensions/curator/metadata_curation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index da4e931ec..4071b931a 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -299,7 +299,7 @@ syn.login() # 3. Get annotations from CSV file annotations = pd.read_csv(CSV_PATH).to_dict(orient="records") -# 4. Create a folder store the file that will be used for curation +# 4. Create a folder to store the file that will be used for curation folder = Folder(name=FOLDER_NAME, parent_id=PROJECT_ID) folder = folder.store(synapse_client=syn) From dbddb310b2843b91d3a795a75c13494114ea0324 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 5 Mar 2026 10:13:15 -0800 Subject: [PATCH 07/12] fix c+p error --- docs/guides/extensions/curator/metadata_curation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index 4071b931a..980b58bea 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -346,7 +346,7 @@ JSON_SCHEMA_URI = "dpetest-test.schematic.Patient" CURATION_TASK_NAME = "Record-based curation task for patients" INSTRUCTIONS = "Please curate the patient information." RECORD_SET_NAME = "Patient Record Set" -RECORD_SET_DESCRIPTION = "A record set for patients created for a file-based curation task example." +RECORD_SET_DESCRIPTION = "A record set for patients created for a record-based curation task example." UPSERT_KEYS = ["PatientID"] # 2. Login to Synapse From 81dee71f7559f867a40187aa7a3b1435adacc84b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 5 Mar 2026 11:53:07 -0800 Subject: [PATCH 08/12] changed example to biospecimen, added example csv --- .../extensions/curator/metadata_curation.md | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index 980b58bea..56e0fc691 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -277,8 +277,17 @@ else: The following example is for file-based curation. It assumes your data is in a CSV file where each column is a property. +Here is the csv used in the example: + +```csv +SampleID,Component,PatientID,TissueStatus +id1,biospecimen,pid1,Healthy +id1,biospecimen,pid2,Malignent +``` + ```python import pandas as pd +import uuid from synapseclient import Synapse from synapseclient.models import CurationTask, Folder, File from synapseclient.core.utils import make_bogus_data_file @@ -286,11 +295,11 @@ from synapseclient.extensions.curator import create_file_based_metadata_task # 1. Replace all these values with your own information PROJECT_ID = "syn68175188" -FOLDER_NAME = "Patient Curation Folder" -CSV_PATH = "patient.csv" -JSON_SCHEMA_URI = "dpetest-test.schematic.Patient" -CURATION_TASK_NAME = "File-based curation task for patients" -INSTRUCTIONS = "Please curate the patient information." +FOLDER_NAME = f"Biospecimen Curation Folder {uuid.uuid4().hex[:8]}" +CSV_PATH = "biospecimen.csv" +JSON_SCHEMA_URI = "dpetest-test.schematic.Biospecimen" +CURATION_TASK_NAME = "File-based curation task for biospecimens" +INSTRUCTIONS = "Please curate the biospecimen information." # 2. Login to Synapse syn = Synapse() From 15f2bfdf37b2fd9b96dd0f202f971020debe7af7 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 5 Mar 2026 12:01:53 -0800 Subject: [PATCH 09/12] added example csv --- .../extensions/curator/metadata_curation.md | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index 56e0fc691..9eadaccf3 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -298,7 +298,7 @@ PROJECT_ID = "syn68175188" FOLDER_NAME = f"Biospecimen Curation Folder {uuid.uuid4().hex[:8]}" CSV_PATH = "biospecimen.csv" JSON_SCHEMA_URI = "dpetest-test.schematic.Biospecimen" -CURATION_TASK_NAME = "File-based curation task for biospecimens" +CURATION_TASK_NAME = f"File-based curation task for biospecimens {uuid.uuid4().hex[:8]}" INSTRUCTIONS = "Please curate the biospecimen information." # 2. Login to Synapse @@ -339,20 +339,26 @@ CurationTask(task_id=task_id).delete(synapse_client=syn, delete_source=True) The following example is for record-based curation. It assumes your data is in a CSV file where each column is a property. -```python -import pandas as pd +Here is the csv used in the example: +```csv +Sex,Component,Diagnosis,PatientID,CancerType,YearofBirth,FamilyHistory +Male,Patient,Healthy,id1,,1970, +Female,Patient,Healthy,id2,,1980, +``` + +```python +import uuid from synapseclient import Synapse from synapseclient.models import Folder from synapseclient.extensions.curator import create_record_based_metadata_task - # 1. Replace all these values with your own information PROJECT_ID = "syn68175188" -FOLDER_NAME = "Patient Curation Folder" +FOLDER_NAME = f"Patient Curation Folder {uuid.uuid4().hex[:8]}" CSV_PATH = "patient.csv" JSON_SCHEMA_URI = "dpetest-test.schematic.Patient" -CURATION_TASK_NAME = "Record-based curation task for patients" +CURATION_TASK_NAME = f"Record-based curation task for patients {uuid.uuid4().hex[:8]}" INSTRUCTIONS = "Please curate the patient information." RECORD_SET_NAME = "Patient Record Set" RECORD_SET_DESCRIPTION = "A record set for patients created for a record-based curation task example." @@ -387,8 +393,7 @@ record_set = record_set.store(synapse_client=syn) # 6. Cleanup all Synapse entities created folder.delete(synapse_client=syn) -record_set.delete(synapse_client=syn, delete_source=True) -grid.delete(synapse_client=syn) +task.delete(synapse_client=syn, delete_source=True) ``` ### Example: Complete validation workflow for animal study metadata From 649837cbc9b334c1d9b90d384bdb2dd20d635dfd Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 5 Mar 2026 12:02:34 -0800 Subject: [PATCH 10/12] fixed typo --- docs/guides/extensions/curator/metadata_curation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index 9eadaccf3..43f65a5b5 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -312,7 +312,7 @@ annotations = pd.read_csv(CSV_PATH).to_dict(orient="records") folder = Folder(name=FOLDER_NAME, parent_id=PROJECT_ID) folder = folder.store(synapse_client=syn) -# 5. Create a files, annotate them, and store them in the Synapse folder +# 5. Create files, annotate them, and store them in the Synapse folder path_to_file1 = make_bogus_data_file(n=5) file = File(path=path_to_file1, parent_id=folder.id, annotations=annotations[0]) file = file.store(synapse_client=syn) From 1ba8b795a05194b378f008e0325ce3fd0692caa6 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 5 Mar 2026 12:05:17 -0800 Subject: [PATCH 11/12] randomize entity names --- docs/guides/extensions/curator/metadata_curation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index 43f65a5b5..8dc8a8386 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -360,7 +360,7 @@ CSV_PATH = "patient.csv" JSON_SCHEMA_URI = "dpetest-test.schematic.Patient" CURATION_TASK_NAME = f"Record-based curation task for patients {uuid.uuid4().hex[:8]}" INSTRUCTIONS = "Please curate the patient information." -RECORD_SET_NAME = "Patient Record Set" +RECORD_SET_NAME = f"Patient Record Set {uuid.uuid4().hex[:8]}" RECORD_SET_DESCRIPTION = "A record set for patients created for a record-based curation task example." UPSERT_KEYS = ["PatientID"] From eaa71478c29e425d9604cfa51139ad6202538082 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 6 Mar 2026 08:00:25 -0800 Subject: [PATCH 12/12] added print statements for all entities created --- .../extensions/curator/metadata_curation.md | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index 8dc8a8386..60e3375cf 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -311,14 +311,17 @@ annotations = pd.read_csv(CSV_PATH).to_dict(orient="records") # 4. Create a folder to store the file that will be used for curation folder = Folder(name=FOLDER_NAME, parent_id=PROJECT_ID) folder = folder.store(synapse_client=syn) +print(f"Created folder with ID: {folder.id}") # 5. Create files, annotate them, and store them in the Synapse folder path_to_file1 = make_bogus_data_file(n=5) -file = File(path=path_to_file1, parent_id=folder.id, annotations=annotations[0]) -file = file.store(synapse_client=syn) +file1 = File(path=path_to_file1, parent_id=folder.id, annotations=annotations[0]) +file1 = file1.store(synapse_client=syn) +print(f"Created file with ID: {file1.id}") path_to_file2 = make_bogus_data_file(n=5) -file = File(path=path_to_file2, parent_id=folder.id, annotations=annotations[1]) -file = file.store(synapse_client=syn) +file2 = File(path=path_to_file2, parent_id=folder.id, annotations=annotations[1]) +file2 = file2.store(synapse_client=syn) +print(f"Created file with ID: {file2.id}") # 6. Create EntityView and CurationTask view_id, task_id = create_file_based_metadata_task( @@ -328,6 +331,8 @@ view_id, task_id = create_file_based_metadata_task( schema_uri=JSON_SCHEMA_URI, synapse_client=syn, ) +print(f"Created EntityView with ID: {view_id}") +print(f"Created CurationTask with ID: {task_id}") # 7. Cleanup all Synapse entities created folder.delete(synapse_client=syn) @@ -371,6 +376,7 @@ syn.login() # 3. Create a folder to store the RecordSet in folder = Folder(name=FOLDER_NAME, parent_id=PROJECT_ID) folder = folder.store(synapse_client=syn) +print(f"Created folder with ID: {folder.id}") # 4. Create RecordSet, CurationTask, and Grid record_set, task, grid = create_record_based_metadata_task( @@ -383,6 +389,9 @@ record_set, task, grid = create_record_based_metadata_task( schema_uri=JSON_SCHEMA_URI, synapse_client=syn, ) +print(f"Created RecordSet with ID: {record_set.id}") +print(f"Created CurationTask with ID: {task.task_id}") +print(f"Created Grid with ID: {grid.session_id}") # 5. Store the record set with the path to the CSV file as an annotation. # TODO: https://sagebionetworks.jira.com/browse/SYNPY-1781