Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions qtapp/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,9 @@ def _show_project_details(self, project_url: str, highlight_key: str = ""):
if proj is None:
return
basename = project_url.split("/")[-1] or project_url
html = get_details_html(basename, project_url, proj.to_dict(), highlight_key)
html = get_details_html(
basename, project_url, proj.to_dict(compact=False), highlight_key
)
self.detail.setHtml(html)

def _on_detail_message(self, msg: dict):
Expand Down Expand Up @@ -335,7 +337,9 @@ def __init__(self, parent=None):

def refresh(self, scroll_to: str | None = None):
"""Re-render the library HTML panel."""
data = {url: proj.to_dict() for url, proj in library.entries.items()}
data = {
url: proj.to_dict(compact=False) for url, proj in library.entries.items()
}
info_data = class_infos()
spec_names = list(info_data.get("specs", {}).keys())
html = get_library_html(data, spec_names, scroll_to_project_url=scroll_to)
Expand Down
79 changes: 78 additions & 1 deletion qtapp/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,16 @@ def _get_info_data() -> dict:
border-radius: 50%; animation: spin 0.7s linear infinite; opacity: 0.8;
}
@keyframes spin { to { transform: rotate(360deg); } }

.html-preview {
display: block;
width: 100%;
border: none;
margin-top: 4px;
margin-left: 20px;
min-height: 40px;
max-height: 600px;
}
"""

_INFO_POPUP_JS = """
Expand Down Expand Up @@ -815,7 +825,7 @@ def get_library_html(
# Details panel
# ---------------------------------------------------------------------------

_SKIP_KEYS = {"klass", "proc", "storage_options", "children", "url"}
_SKIP_KEYS = {"klass", "proc", "storage_options", "children", "url", "_html"}


def _build_tooltip(doc, link):
Expand Down Expand Up @@ -1002,6 +1012,13 @@ def scalar_label(v):
"children": children or None,
"infoData": node_info_data,
"itemType": role if role not in ("none", "field") else None,
"htmlContent": (
value["_html"]
if role == "content"
and isinstance(value, dict)
and isinstance(value.get("_html"), str)
else None
),
}
)

Expand All @@ -1017,6 +1034,52 @@ def _is_leaf_artifact(node: dict) -> bool:
return not any(c.get("role") == "artifact" for c in children)


# Dark console-green stylesheet injected into every html-preview srcdoc.
_HTML_PREVIEW_CSS = (
"<style>"
":root{--bg:#0d1117;--bg-hd:#161b22;--bg-alt:#111820;--grn:#39d353;--grn-d:#26a641;"
"--grn-m:#196127;--bd:#21262d;--bd-d:#161b22;--fg:#c9d1d9;--fg-d:#8b949e;"
"--bb:#1f6feb;--bg2:#30363d;--fn:ui-monospace,'Cascadia Code','Fira Mono',monospace}"
"*{box-sizing:border-box}"
"body{background:var(--bg);color:var(--fg);margin:0;font-family:var(--fn);font-size:12px}"
".ps-data-card{border:1px solid var(--bd);border-radius:6px;overflow:hidden;"
"background:var(--bg);color:var(--fg)}"
".ps-data-card-header{background:var(--bg-hd);padding:7px 12px;display:flex;"
"align-items:center;gap:8px;border-bottom:1px solid var(--bd)}"
".ps-data-card-header .ps-icon{font-size:16px}"
".ps-data-card-header .ps-name{font-weight:bold;font-size:13px;color:var(--grn)}"
".ps-data-card-header .ps-badge{background:var(--bb);color:#fff;border-radius:10px;"
"padding:1px 7px;font-size:10px}"
".ps-data-card-header .ps-badge-gray{background:var(--bg2);color:var(--fg);"
"border-radius:10px;padding:1px 7px;font-size:10px}"
".ps-data-meta{padding:8px 12px;border-bottom:1px solid var(--bd-d)}"
".ps-data-meta table{border-collapse:collapse;width:100%}"
".ps-data-meta td{padding:2px 8px 2px 0;vertical-align:top}"
".ps-data-meta td:first-child{color:var(--fg-d);white-space:nowrap;width:110px}"
"details>summary{list-style:none;cursor:pointer;color:var(--grn-d);font-size:11px;margin-top:4px}"
"details>summary::-webkit-details-marker{display:none}"
".ps-schema-table{font-size:11px;border-collapse:collapse;margin-top:4px;width:100%}"
".ps-schema-table th{background:var(--bg-hd);color:var(--grn-d);padding:2px 8px;"
"text-align:left;border:1px solid var(--bd)}"
".ps-schema-table td{padding:2px 8px;border:1px solid var(--bd-d);font-family:var(--fn);color:var(--fg)}"
".ps-schema-table td strong{color:var(--grn)}"
".ps-preview{padding:8px 12px}"
".ps-preview-title{font-weight:bold;font-size:10px;color:var(--grn-m);margin-bottom:5px;"
"text-transform:uppercase;letter-spacing:.8px}"
".ps-df-wrap{overflow-x:auto}"
".ps-df-wrap table,.dataframe{font-size:11px!important;border-collapse:collapse!important;"
"width:100%!important;color:var(--fg)!important;background:var(--bg)!important}"
".ps-df-wrap th,.dataframe thead th{background:var(--bg-hd)!important;color:var(--grn-d)!important;"
"padding:3px 10px!important;border:1px solid var(--bd)!important;white-space:nowrap;text-align:left!important}"
".ps-df-wrap td,.dataframe tbody td{padding:2px 10px!important;border:1px solid var(--bd-d)!important;"
"color:var(--fg)!important;background:var(--bg)!important;white-space:nowrap;"
"max-width:200px;overflow:hidden;text-overflow:ellipsis}"
".dataframe tbody tr:nth-child(even) td{background:var(--bg-alt)!important}"
".ps-img-preview{max-width:100%;max-height:200px;border-radius:4px}"
"</style>"
)


def _render_detail_node(node: dict, depth: int) -> str:
has_children = bool(node.get("children"))
can_make = _is_leaf_artifact(node)
Expand Down Expand Up @@ -1067,6 +1130,19 @@ def _render_detail_node(node: dict, depth: int) -> str:
f'<ul class="tree-children" data-depth="{depth + 1}">{inner}</ul>'
)

html_content = node.get("htmlContent")
if html_content:
srcdoc = (
(_HTML_PREVIEW_CSS + html_content)
.replace("&", "&amp;")
.replace('"', "&quot;")
.replace("<", "&lt;")
.replace(">", "&gt;")
)
html_preview = f'<iframe class="html-preview" sandbox="allow-scripts" srcdoc="{srcdoc}"></iframe>'
else:
html_preview = ""

make_btn = (
f'<button class="make-button" data-item="{node_data}" title="Make artifact">Make</button>'
if can_make
Expand All @@ -1085,6 +1161,7 @@ def _render_detail_node(node: dict, depth: int) -> str:
{make_btn}
{info_btn}
</div>
{html_preview}
{children_html}
</li>"""

Expand Down
39 changes: 32 additions & 7 deletions src/projspec/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,16 +89,30 @@ def version():
help="List of spec types to ignore (comma-separated list in camel or snake case)",
)
@click.option(
"--json-out", is_flag=True, default=False, help="JSON output, for projects only"
"--json-out",
is_flag=True,
default=False,
help="JSON output, for projects only",
)
@click.option(
"--html-out", is_flag=True, default=False, help="HTML output, for projects only"
"--html-out",
is_flag=True,
default=False,
help="HTML output, for projects only",
)
@click.option("--walk", is_flag=True, help="To descend into all child directories")
@click.option("--summary", is_flag=True, help="Show abbreviated output")
@click.option("--library", is_flag=True, help="Add to library")
def scan(
path, storage_options, types, xtypes, json_out, html_out, walk, summary, library
path,
storage_options,
types,
xtypes,
json_out,
html_out,
walk,
summary,
library,
):
"""Scan the given path for projects, and display

Expand All @@ -109,13 +123,17 @@ def scan(
else:
types = types.split(",")
proj = projspec.Project(
path, storage_options=storage_options, types=types, xtypes=xtypes, walk=walk
path,
storage_options=storage_options,
types=types,
xtypes=xtypes,
walk=walk,
)
if summary:
print(proj.text_summary())
else:
if json_out:
print(json.dumps(proj.to_dict(compact=True)))
print(json.dumps(proj.to_dict(compact=False)))
elif html_out:
print(proj._repr_html_())
else:
Expand Down Expand Up @@ -199,14 +217,21 @@ def library():

@library.command("list")
@click.option(
"--json-out", is_flag=True, default=False, help="JSON output, for projects only"
"--json-out",
is_flag=True,
default=False,
help="JSON output, for projects only",
)
def list(json_out):
from projspec.library import ProjectLibrary

library = ProjectLibrary()
if json_out:
print(json.dumps({k: v.to_dict() for k, v in library.entries.items()}))
print(
json.dumps(
{k: v.to_dict(compact=False) for k, v in library.entries.items()}
)
)
else:
for url in sorted(library.entries):
proj = library.entries[url]
Expand Down
72 changes: 72 additions & 0 deletions src/projspec/content/data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Contents specifying datasets"""

from dataclasses import dataclass, field

from projspec.content import BaseContent
Expand Down Expand Up @@ -33,3 +34,74 @@ class IntakeSource(BaseContent):

# TODO: add better fields: args, driver/reader, metadata, description
name: str


@dataclass
class DataResource(BaseContent):
"""A data resource found inside a data-only directory.

Describes one logical dataset — which may be a flat collection of files, a
Hive-partitioned tree, an Iceberg/Delta table, a Zarr store, or any other
recognised on-disk layout.

The ``path`` field is a human-readable basename that identifies the resource:

- Single file: ``"data.csv"``
- Multi-file series: ``"part*.parquet"`` (glob-style, common prefix + ``*`` + ext)
- Directory-as-dataset (Hive partition, Zarr store, …): ``"year=2024/"``

The ``modality`` field classifies the broad nature of the data using the
vocabulary established by intake's ``structure`` tags and napari's layer
type system:

- ``"tabular"`` — row/column data (CSV, Parquet, ORC, Excel, …)
- ``"array"`` — N-dimensional arrays (NumPy, HDF5, NetCDF, Zarr, …)
- ``"image"`` — 2-D/3-D images (PNG, JPEG, TIFF, DICOM, NIfTI, …)
- ``"timeseries"`` — time-indexed signals (WAV, GRIB, …)
- ``"geospatial"`` — vector/raster geodata (Shapefile, GeoJSON, GeoTIFF, …)
- ``"model"`` — ML model weights (GGUF, SafeTensors, PyTorch, …)
- ``"nested"`` — hierarchical / JSON-like (Avro, YAML, XML, …)
- ``"document"`` — human-readable documents (PDF, DOCX, …)
- ``"video"`` — video streams (MP4, AVI, …)
- ``"archive"`` — compressed bundles (ZIP, tar.gz, …)
- ``""`` — unknown / mixed

The ``schema`` field is format-specific:

- Tabular (Parquet, Arrow, CSV, …): ``{column_name: dtype_str, …}``
- Image / array: ``{"width": int, "height": int, "channels": int, "mode": str}``
- Audio: ``{"sample_rate": int, "channels": int, "frames": int}``
- HDF5 / Zarr / NetCDF: ``{"variables": [...], "dims": {...}, "attrs": {...}}``
- Unknown / library not available: ``{}``
"""

path: str # basename (or glob pattern / dir/ ) identifying this resource
format: str # canonical format string, e.g. "parquet", "csv", "png", "hdf5"
modality: str = "" # broad data nature; see docstring for vocabulary
layout: str = "" # "flat"|"hive"|"iceberg"|"delta"|"zarr_store"|"tiledarray"|""
file_count: int = 0
total_size: int = 0 # bytes; 0 when unknown (e.g. remote FS without size info)
schema: dict | list = field(default_factory=dict)
# full path to one representative file, for use by preview loaders
sample_path: str = ""
metadata: dict = field(default_factory=dict) # catch-all extras
_html = None

def __repr__(self) -> str:
from projspec.content.data_html import repr_text

return repr_text(self)

def _repr_html_(self) -> str:
"""Jupyter rich display — returns cached HTML, rendering on first call."""
if self._html is None:
from projspec.content.data_html import repr_html

self._html = repr_html(self)
return self._html

def to_dict(self, compact=False):
d = super().to_dict(compact=compact)
if not compact:
d["_html"] = self._repr_html_()
return d
Loading
Loading