Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/fairscape_cli/commands/build_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
)

from fairscape_models.rocrate import ROCrateV1_2, ROCrateMetadataElem
from fairscape_cli.utils.serialization import prune_none
from fairscape_models.conversion.converter import ROCToTargetConverter
from fairscape_models.conversion.mapping.croissant import MAPPING_CONFIGURATION as CROISSANT_MAPPING

Expand Down Expand Up @@ -535,7 +536,7 @@ def generate_evidence_graph(

# Write the updated metadata back to the file
with open(metadata_file, 'w') as f:
json.dump(metadata, f, indent=2)
json.dump(prune_none(metadata), f, indent=2)

click.echo(f"Added hasEvidenceGraph reference to {ark_id} in RO-Crate metadata")
except Exception as e:
Expand Down Expand Up @@ -774,4 +775,4 @@ def validate_merkle_command(ctx, rocrate_path: pathlib.Path, release: bool):
elif s != c:
click.echo(f" CHANGED: {url}")

ctx.exit(1)
ctx.exit(1)
6 changes: 3 additions & 3 deletions src/fairscape_cli/data_fetcher/GenomicData.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ class Outputs(BaseModel):
from fairscape_cli.data_fetcher.bioproject_fetcher import fetch_bioproject_data

from fairscape_cli.models.rocrate import GenerateROCrate, AppendCrate
from fairscape_cli.utils.serialization import prune_none
from fairscape_cli.models.dataset import GenerateDataset
from fairscape_cli.models.experiment import GenerateExperiment
from fairscape_cli.models.instrument import GenerateInstrument
Expand Down Expand Up @@ -342,7 +343,7 @@ def to_rocrate(
root_dataset_node["hasPart"].append({"@id": entity_id})

f.seek(0)
json.dump(crate_json, f, indent=2)
json.dump(prune_none(crate_json), f, indent=2)
f.truncate()


Expand All @@ -360,7 +361,7 @@ def to_rocrate(

if updated:
f.seek(0)
json.dump(crate_json_final, f, indent=2)
json.dump(prune_none(crate_json_final), f, indent=2)
f.truncate()


Expand Down Expand Up @@ -500,4 +501,3 @@ def from_json(cls, data: dict) -> 'GenomicData':
experiments=Experiments(items=internal_experiments),
outputs=Outputs(items=outputs_list)
)

38 changes: 38 additions & 0 deletions src/fairscape_cli/datasheet_builder/rocrate/datasheet_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ def convert_main_sections(self) -> FairscapeDatasheet:
distribution = distribution_converter.convert()

subcrate_items = self._process_all_subcrates()
if not subcrate_items:
subcrate_items = self._build_single_crate_composition()
composition = CompositionSection(items=subcrate_items) if subcrate_items else None

return FairscapeDatasheet(
Expand Down Expand Up @@ -226,6 +228,42 @@ def _process_all_subcrates(self) -> List[SubCrateItem]:

return subcrate_items

def _build_single_crate_composition(self) -> List[SubCrateItem]:
"""When no subcrates exist, treat the main crate itself as a single subcrate."""
try:
converter = ROCToTargetConverter(
source_crate=self.main_crate,
mapping_configuration=SUBCRATE_MAPPING_CONFIGURATION,
global_index=self.global_metadata_index
)
subcrate_item = converter.convert()
subcrate_item.published = self.published
subcrate_item.preview_url = ""

if not subcrate_item.size and self.base_dir.exists():
try:
dir_size = get_directory_size(str(self.base_dir))
subcrate_item.size = format_size(dir_size)
except Exception:
subcrate_item.size = "Unknown"

main_root = self.main_crate.metadataGraph[1].model_dump()
if not subcrate_item.doi:
subcrate_item.doi = main_root.get('identifier')
if not subcrate_item.related_publications:
pubs = main_root.get('associatedPublication', [])
if pubs:
subcrate_item.related_publications = pubs if isinstance(pubs, list) else [pubs]

self._enhance_subcrate_item(subcrate_item, self.main_crate)

return [subcrate_item]
except Exception as e:
print(f"Error building single crate composition: {e}")
import traceback
traceback.print_exc()
return []

def _enhance_subcrate_item(self, subcrate_item: SubCrateItem, subcrate: ROCrateV1_2):
"""Add statistical summary info to subcrate item if present."""
root_dict = subcrate.metadataGraph[1].model_dump() if len(subcrate.metadataGraph) > 1 else {}
Expand Down
35 changes: 33 additions & 2 deletions src/fairscape_cli/datasheet_builder/rocrate/summary_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from fairscape_models.rocrate import ROCrateV1_2
from fairscape_models.conversion.mapping.AIReady import score_rocrate
from fairscape_models.conversion.models.AIReady import AIReadyScore
from fairscape_cli.utils.serialization import model_dump_pruned


@dataclass
Expand Down Expand Up @@ -89,7 +90,7 @@ def extract_summary_data(self, crate: ROCrateV1_2) -> SummaryData:
formats = []
formats = [f for f in formats if f and f != "unknown"]

return SummaryData(
summary = SummaryData(
name=root_data.get("name", "Unnamed Dataset"),
description=root_data.get("description", ""),
total_size_formatted=size_str,
Expand All @@ -100,6 +101,36 @@ def extract_summary_data(self, crate: ROCrateV1_2) -> SummaryData:
formats=formats
)

# Fallback: compute from graph if evi:* fields are absent (single crate case)
if summary.total_entities == 0:
formats_set = set()
for item in crate.metadataGraph:
if item.guid == "ro-crate-metadata.json":
continue
item_dict = item.model_dump(by_alias=True)
item_type = item_dict.get("@type", "")
type_str = " ".join(item_type) if isinstance(item_type, list) else str(item_type)

if "ROCrate" in type_str or "CreativeWork" in type_str:
continue

summary.total_entities += 1

if "Dataset" in type_str:
summary.dataset_count += 1
fmt = item_dict.get("fileFormat")
if fmt and fmt != "unknown":
formats_set.add(fmt)
elif "Software" in type_str or "SoftwareSourceCode" in type_str:
summary.software_count += 1
elif "Computation" in type_str:
summary.computation_count += 1

if not summary.formats and formats_set:
summary.formats = sorted(formats_set)

return summary

@staticmethod
def _format_size(size_bytes: int) -> str:
"""Format bytes to human-readable size."""
Expand Down Expand Up @@ -160,7 +191,7 @@ def compute_aiready_score(self, crate: ROCrateV1_2) -> Tuple[AIReadyScoreData, A

def save_aiready_score(self, raw_score: AIReadyScore, output_path: Path) -> None:
"""Save the AI-Ready score to a JSON file."""
score_dict = raw_score.model_dump()
score_dict = model_dump_pruned(raw_score)
with open(output_path, 'w') as f:
json.dump(score_dict, f, indent=2)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -297,9 +297,11 @@ <h4>Content Summary</h4>
</div>
</div>
</div>
{% if subcrate.preview_url %}
<div class="view-full-link">
<a href="{{ subcrate.preview_url }}">View Full Dataset Details</a>
</div>
{% endif %}
</div>
{% endfor %} {% else %}
<p>No subcrates found.</p>
Expand Down
5 changes: 3 additions & 2 deletions src/fairscape_cli/entailments/find_outputs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pathlib
import json
from typing import List, Dict, Tuple, Set, Any
from fairscape_cli.utils.serialization import prune_none

def extract_datasets_from_graph(graph: List[Dict]) -> List[Tuple[str, bool]]:
"""
Expand Down Expand Up @@ -169,7 +170,7 @@ def add_inputs_outputs_to_rocrate(rocrate_path: pathlib.Path) -> Tuple[bool, str
metadata["@graph"] = graph

with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=2)
json.dump(prune_none(metadata), f, indent=2)

input_count = len(inputs)
output_count = len(outputs)
Expand All @@ -179,4 +180,4 @@ def add_inputs_outputs_to_rocrate(rocrate_path: pathlib.Path) -> Tuple[bool, str
except json.JSONDecodeError as e:
return False, f"Error parsing JSON: {e}"
except Exception as e:
return False, f"Unexpected error: {e}"
return False, f"Unexpected error: {e}"
5 changes: 3 additions & 2 deletions src/fairscape_cli/entailments/inverse.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pathlib
import json
from fairscape_cli.utils.serialization import prune_none
from typing import List, Tuple, Dict, Any
from rdflib import Graph, URIRef
from rdflib.namespace import OWL
Expand Down Expand Up @@ -192,12 +193,12 @@ def augment_rocrate_with_inverses(
if modified_count > 0:
try:
with open(metadata_file_path, 'w') as f:
json.dump(json_data, f, indent=2, ensure_ascii=False)
json.dump(prune_none(json_data), f, indent=2, ensure_ascii=False)
print(f"RO-Crate '{metadata_file_path}' augmented with inverse properties. {modified_count} modifications made.")
except Exception as e:
print(f"Error saving augmented RO-Crate JSON to {metadata_file_path}: {e}")
return False
else:
print(f"No inverse properties needed to be added or RO-Crate '{metadata_file_path}' is already consistent.")

return True
return True
3 changes: 2 additions & 1 deletion src/fairscape_cli/models/bagit.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from typing import (
Optional
)
from fairscape_cli.utils.serialization import model_dump_pruned


class BagIt(BaseModel):
Expand Down Expand Up @@ -94,7 +95,7 @@ def create_bagit_metadata(self):
bagit_info_path = self.bagit_path / 'bag-info.txt'

with bagit_info_path.open(mode="w") as bag_info_file:
for key, value in self.model_dump(by_alias=True).items():
for key, value in model_dump_pruned(self, by_alias=True).items():
if key != 'bagit_path' and key != 'rocrate_path':
bag_info_file.write('%s: %s\n' % (key, value))

Expand Down
29 changes: 15 additions & 14 deletions src/fairscape_cli/models/rocrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from fairscape_cli.config import NAAN, DEFAULT_CONTEXT
from fairscape_cli.models.guid_utils import GenerateDatetimeSquid, clean_guid
from fairscape_models.rocrate import ROCrateV1_2, ROCrateMetadataElem, ROCrateMetadataFileElem
from fairscape_cli.utils.serialization import prune_none, model_dump_pruned

def GenerateROCrate(
path: pathlib.Path,
Expand Down Expand Up @@ -80,7 +81,7 @@ def GenerateROCrate(
]}
)

rocrate_dict = rocrate.model_dump(by_alias=True, exclude_none=True)
rocrate_dict = model_dump_pruned(rocrate, by_alias=True)

if 'ro-crate-metadata.json' in str(path):
roCrateMetadataPath = path
Expand All @@ -94,7 +95,7 @@ def GenerateROCrate(
with roCrateMetadataPath.open(mode="w") as metadataFile:
json.dump(rocrate_dict, metadataFile, indent=2)

return root_dataset.model_dump(by_alias=True, exclude_none=True)
return model_dump_pruned(root_dataset, by_alias=True)
class ROCrate(ROCrateMetadataElem):
model_config = ConfigDict(populate_by_name=True)

Expand Down Expand Up @@ -213,7 +214,7 @@ def create_subcrate(

f.seek(0)
f.truncate()
json.dump(rocrate.model_dump(by_alias=True), f, indent=2)
json.dump(model_dump_pruned(rocrate, by_alias=True), f, indent=2)

return subcrate['@id']

Expand Down Expand Up @@ -269,7 +270,7 @@ def initCrate(self):

# Write to file
with ro_crate_metadata_path.open(mode="w") as metadata_file:
json.dump(rocrate_metadata, metadata_file, indent=2)
json.dump(prune_none(rocrate_metadata), metadata_file, indent=2)

def registerObject(self, model: Union[Dataset, Software, Computation]):
"""Add metadata to the graph of an ROCrate"""
Expand All @@ -279,7 +280,7 @@ def registerObject(self, model: Union[Dataset, Software, Computation]):
rocrate_metadata = json.load(rocrate_metadata_file)

# Add to the @graph
model_data = model.model_dump(by_alias=True, exclude_none=True)
model_data = model_dump_pruned(model, by_alias=True)
rocrate_metadata['@graph'].append(model_data)

# Add reference to root dataset's hasPart
Expand All @@ -294,7 +295,7 @@ def registerObject(self, model: Union[Dataset, Software, Computation]):
# Write back to file
rocrate_metadata_file.seek(0)
rocrate_metadata_file.truncate()
json.dump(rocrate_metadata, rocrate_metadata_file, indent=2)
json.dump(prune_none(rocrate_metadata), rocrate_metadata_file, indent=2)

def registerDataset(self, dataset: Dataset):
self.registerObject(dataset)
Expand Down Expand Up @@ -336,7 +337,7 @@ def AppendCrate(
root_dataset['hasPart'] = []

for element in elements:
element_data = element.model_dump(by_alias=True, exclude_none=True)
element_data = model_dump_pruned(element, by_alias=True)
rocrate_metadata['@graph'].append(element_data)
root_dataset['hasPart'].append({"@id": element_data["@id"]})

Expand All @@ -346,7 +347,7 @@ def AppendCrate(
# Write back to file
rocrate_metadata_file.seek(0)
rocrate_metadata_file.truncate()
json.dump(rocrate_metadata, rocrate_metadata_file, indent=2)
json.dump(prune_none(rocrate_metadata), rocrate_metadata_file, indent=2)


def CopyToROCrate(source_filepath: str, destination_filepath: str):
Expand Down Expand Up @@ -385,7 +386,7 @@ def UpdateCrate(
rocrate_metadata = json.load(rocrate_metadata_file)

# Find and replace the element with matching @id
element_data = element.model_dump(by_alias=True, exclude_none=True)
element_data = model_dump_pruned(element, by_alias=True)
for i, existing in enumerate(rocrate_metadata['@graph']):
if existing.get('@id') == element_data['@id']:
rocrate_metadata['@graph'][i] = element_data
Expand All @@ -397,7 +398,7 @@ def UpdateCrate(
# Write back to file
rocrate_metadata_file.seek(0)
rocrate_metadata_file.truncate()
json.dump(rocrate_metadata, rocrate_metadata_file, indent=2)
json.dump(prune_none(rocrate_metadata), rocrate_metadata_file, indent=2)

def LinkSubcrates(parent_crate_path: pathlib.Path) -> List[str]:
parent_metadata_file = parent_crate_path / 'ro-crate-metadata.json'
Expand Down Expand Up @@ -508,7 +509,7 @@ def find_and_process_subcrates(directory: pathlib.Path, base_path: pathlib.Path)
if modified:
subcrate_metadata['@graph'][subcrate_root_index] = subcrate_root
with subcrate_metadata_file.open('w') as f:
json.dump(subcrate_metadata, f, indent=2)
json.dump(prune_none(subcrate_metadata), f, indent=2)

reference_dict = dict(subcrate_root)
relative_path = (subcrate_metadata_file.relative_to(base_path)).as_posix()
Expand All @@ -535,7 +536,7 @@ def find_and_process_subcrates(directory: pathlib.Path, base_path: pathlib.Path)
parent_root_dataset['hasPart'].append({'@id': sub_id})

with parent_metadata_file.open('w') as f:
json.dump(parent_metadata, f, indent=2)
json.dump(prune_none(parent_metadata), f, indent=2)
else:
print("No valid sub-crates found to link.")

Expand Down Expand Up @@ -881,11 +882,11 @@ def UpdateEntitiesInGraph(
return False, f"RO-Crate became invalid after update operations. Details: {e}"

with metadata_filepath.open(mode="w") as metadataFile:
json.dump(validated_crate.model_dump(by_alias=True), metadataFile, indent=2, ensure_ascii=False)
json.dump(model_dump_pruned(validated_crate, by_alias=True), metadataFile, indent=2, ensure_ascii=False)

return True, f"Successfully processed entities. Matched: {matched_count}, Modified: {modified_count}."

except Exception as e:
import traceback
print(f"DEBUG: Unexpected error in UpdateEntitiesInGraph: {traceback.format_exc()}")
return False, f"An unexpected error occurred: {type(e).__name__} - {e}"
return False, f"An unexpected error occurred: {type(e).__name__} - {e}"
4 changes: 2 additions & 2 deletions src/fairscape_cli/models/schema/tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
DEFAULT_SCHEMA_TYPE,
NAAN,
)
from fairscape_cli.utils.serialization import model_dump_pruned

class FileType(str, Enum):
CSV = "csv"
Expand Down Expand Up @@ -514,7 +515,7 @@ def from_dict(cls, data: dict) -> 'HDF5ValidationSchema':

def write_schema(schema: TabularValidationSchema, output_file: str):
"""Write a schema to a file"""
schema_dict = schema.to_dict()
schema_dict = model_dump_pruned(schema, by_alias=True)

with open(output_file, 'w') as f:
json.dump(schema_dict, f, indent=2)
Expand Down Expand Up @@ -579,4 +580,3 @@ def ReadSchemaLocal(schemaFile: str) -> TabularValidationSchema:
# load the model into
tabularSchema = TabularValidationSchema.model_validate(schemaJson)
return tabularSchema

Loading
Loading