fsspec · martindurant · Apr 15, 2026 · Apr 11, 2026 · Apr 12, 2026 · Apr 13, 2026
diff --git a/qtapp/main.py b/qtapp/main.py
@@ -272,7 +272,9 @@ def _show_project_details(self, project_url: str, highlight_key: str = ""):
         if proj is None:
             return
         basename = project_url.split("/")[-1] or project_url
-        html = get_details_html(basename, project_url, proj.to_dict(), highlight_key)
+        html = get_details_html(
+            basename, project_url, proj.to_dict(compact=False), highlight_key
+        )
         self.detail.setHtml(html)
 
     def _on_detail_message(self, msg: dict):
@@ -335,7 +337,9 @@ def __init__(self, parent=None):
 
     def refresh(self, scroll_to: str | None = None):
         """Re-render the library HTML panel."""
-        data = {url: proj.to_dict() for url, proj in library.entries.items()}
+        data = {
+            url: proj.to_dict(compact=False) for url, proj in library.entries.items()
+        }
         info_data = class_infos()
         spec_names = list(info_data.get("specs", {}).keys())
         html = get_library_html(data, spec_names, scroll_to_project_url=scroll_to)

diff --git a/qtapp/views.py b/qtapp/views.py
@@ -163,6 +163,16 @@ def _get_info_data() -> dict:
         border-radius: 50%; animation: spin 0.7s linear infinite; opacity: 0.8;
     }
     @keyframes spin { to { transform: rotate(360deg); } }
+
+    .html-preview {
+        display: block;
+        width: 100%;
+        border: none;
+        margin-top: 4px;
+        margin-left: 20px;
+        min-height: 40px;
+        max-height: 600px;
+    }
 """
 
 _INFO_POPUP_JS = """
@@ -815,7 +825,7 @@ def get_library_html(
 # Details panel
 # ---------------------------------------------------------------------------
 
-_SKIP_KEYS = {"klass", "proc", "storage_options", "children", "url"}
+_SKIP_KEYS = {"klass", "proc", "storage_options", "children", "url", "_html"}
 
 
 def _build_tooltip(doc, link):
@@ -1002,6 +1012,13 @@ def scalar_label(v):
                 "children": children or None,
                 "infoData": node_info_data,
                 "itemType": role if role not in ("none", "field") else None,
+                "htmlContent": (
+                    value["_html"]
+                    if role == "content"
+                    and isinstance(value, dict)
+                    and isinstance(value.get("_html"), str)
+                    else None
+                ),
             }
         )
 
@@ -1017,6 +1034,52 @@ def _is_leaf_artifact(node: dict) -> bool:
     return not any(c.get("role") == "artifact" for c in children)
 
 
+# Dark console-green stylesheet injected into every html-preview srcdoc.
+_HTML_PREVIEW_CSS = (
+    "<style>"
+    ":root{--bg:#0d1117;--bg-hd:#161b22;--bg-alt:#111820;--grn:#39d353;--grn-d:#26a641;"
+    "--grn-m:#196127;--bd:#21262d;--bd-d:#161b22;--fg:#c9d1d9;--fg-d:#8b949e;"
+    "--bb:#1f6feb;--bg2:#30363d;--fn:ui-monospace,'Cascadia Code','Fira Mono',monospace}"
+    "*{box-sizing:border-box}"
+    "body{background:var(--bg);color:var(--fg);margin:0;font-family:var(--fn);font-size:12px}"
+    ".ps-data-card{border:1px solid var(--bd);border-radius:6px;overflow:hidden;"
+    "background:var(--bg);color:var(--fg)}"
+    ".ps-data-card-header{background:var(--bg-hd);padding:7px 12px;display:flex;"
+    "align-items:center;gap:8px;border-bottom:1px solid var(--bd)}"
+    ".ps-data-card-header .ps-icon{font-size:16px}"
+    ".ps-data-card-header .ps-name{font-weight:bold;font-size:13px;color:var(--grn)}"
+    ".ps-data-card-header .ps-badge{background:var(--bb);color:#fff;border-radius:10px;"
+    "padding:1px 7px;font-size:10px}"
+    ".ps-data-card-header .ps-badge-gray{background:var(--bg2);color:var(--fg);"
+    "border-radius:10px;padding:1px 7px;font-size:10px}"
+    ".ps-data-meta{padding:8px 12px;border-bottom:1px solid var(--bd-d)}"
+    ".ps-data-meta table{border-collapse:collapse;width:100%}"
+    ".ps-data-meta td{padding:2px 8px 2px 0;vertical-align:top}"
+    ".ps-data-meta td:first-child{color:var(--fg-d);white-space:nowrap;width:110px}"
+    "details>summary{list-style:none;cursor:pointer;color:var(--grn-d);font-size:11px;margin-top:4px}"
+    "details>summary::-webkit-details-marker{display:none}"
+    ".ps-schema-table{font-size:11px;border-collapse:collapse;margin-top:4px;width:100%}"
+    ".ps-schema-table th{background:var(--bg-hd);color:var(--grn-d);padding:2px 8px;"
+    "text-align:left;border:1px solid var(--bd)}"
+    ".ps-schema-table td{padding:2px 8px;border:1px solid var(--bd-d);font-family:var(--fn);color:var(--fg)}"
+    ".ps-schema-table td strong{color:var(--grn)}"
+    ".ps-preview{padding:8px 12px}"
+    ".ps-preview-title{font-weight:bold;font-size:10px;color:var(--grn-m);margin-bottom:5px;"
+    "text-transform:uppercase;letter-spacing:.8px}"
+    ".ps-df-wrap{overflow-x:auto}"
+    ".ps-df-wrap table,.dataframe{font-size:11px!important;border-collapse:collapse!important;"
+    "width:100%!important;color:var(--fg)!important;background:var(--bg)!important}"
+    ".ps-df-wrap th,.dataframe thead th{background:var(--bg-hd)!important;color:var(--grn-d)!important;"
+    "padding:3px 10px!important;border:1px solid var(--bd)!important;white-space:nowrap;text-align:left!important}"
+    ".ps-df-wrap td,.dataframe tbody td{padding:2px 10px!important;border:1px solid var(--bd-d)!important;"
+    "color:var(--fg)!important;background:var(--bg)!important;white-space:nowrap;"
+    "max-width:200px;overflow:hidden;text-overflow:ellipsis}"
+    ".dataframe tbody tr:nth-child(even) td{background:var(--bg-alt)!important}"
+    ".ps-img-preview{max-width:100%;max-height:200px;border-radius:4px}"
+    "</style>"
+)
+
+
 def _render_detail_node(node: dict, depth: int) -> str:
     has_children = bool(node.get("children"))
     can_make = _is_leaf_artifact(node)
@@ -1067,6 +1130,19 @@ def _render_detail_node(node: dict, depth: int) -> str:
             f'<ul class="tree-children" data-depth="{depth + 1}">{inner}</ul>'
         )
 
+    html_content = node.get("htmlContent")
+    if html_content:
+        srcdoc = (
+            (_HTML_PREVIEW_CSS + html_content)
+            .replace("&", "&amp;")
+            .replace('"', "&quot;")
+            .replace("<", "&lt;")
+            .replace(">", "&gt;")
+        )
+        html_preview = f'<iframe class="html-preview" sandbox="allow-scripts" srcdoc="{srcdoc}"></iframe>'
+    else:
+        html_preview = ""
+
     make_btn = (
         f'<button class="make-button" data-item="{node_data}" title="Make artifact">Make</button>'
         if can_make
@@ -1085,6 +1161,7 @@ def _render_detail_node(node: dict, depth: int) -> str:
             {make_btn}
             {info_btn}
         </div>
+        {html_preview}
         {children_html}
     </li>"""
 

diff --git a/src/projspec/__main__.py b/src/projspec/__main__.py
@@ -89,16 +89,30 @@ def version():
     help="List of spec types to ignore (comma-separated list in camel or snake case)",
 )
 @click.option(
-    "--json-out", is_flag=True, default=False, help="JSON output, for projects only"
+    "--json-out",
+    is_flag=True,
+    default=False,
+    help="JSON output, for projects only",
 )
 @click.option(
-    "--html-out", is_flag=True, default=False, help="HTML output, for projects only"
+    "--html-out",
+    is_flag=True,
+    default=False,
+    help="HTML output, for projects only",
 )
 @click.option("--walk", is_flag=True, help="To descend into all child directories")
 @click.option("--summary", is_flag=True, help="Show abbreviated output")
 @click.option("--library", is_flag=True, help="Add to library")
 def scan(
-    path, storage_options, types, xtypes, json_out, html_out, walk, summary, library
+    path,
+    storage_options,
+    types,
+    xtypes,
+    json_out,
+    html_out,
+    walk,
+    summary,
+    library,
 ):
     """Scan the given path for projects, and display
 
@@ -109,13 +123,17 @@ def scan(
     else:
         types = types.split(",")
     proj = projspec.Project(
-        path, storage_options=storage_options, types=types, xtypes=xtypes, walk=walk
+        path,
+        storage_options=storage_options,
+        types=types,
+        xtypes=xtypes,
+        walk=walk,
     )
     if summary:
         print(proj.text_summary())
     else:
         if json_out:
-            print(json.dumps(proj.to_dict(compact=True)))
+            print(json.dumps(proj.to_dict(compact=False)))
         elif html_out:
             print(proj._repr_html_())
         else:
@@ -199,14 +217,21 @@ def library():
 
 @library.command("list")
 @click.option(
-    "--json-out", is_flag=True, default=False, help="JSON output, for projects only"
+    "--json-out",
+    is_flag=True,
+    default=False,
+    help="JSON output, for projects only",
 )
 def list(json_out):
     from projspec.library import ProjectLibrary
 
     library = ProjectLibrary()
     if json_out:
-        print(json.dumps({k: v.to_dict() for k, v in library.entries.items()}))
+        print(
+            json.dumps(
+                {k: v.to_dict(compact=False) for k, v in library.entries.items()}
+            )
+        )
     else:
         for url in sorted(library.entries):
             proj = library.entries[url]

diff --git a/src/projspec/content/data.py b/src/projspec/content/data.py
@@ -1,4 +1,5 @@
 """Contents specifying datasets"""
+
 from dataclasses import dataclass, field
 
 from projspec.content import BaseContent
@@ -33,3 +34,74 @@ class IntakeSource(BaseContent):
 
     # TODO: add better fields: args, driver/reader, metadata, description
     name: str
+
+
+@dataclass
+class DataResource(BaseContent):
+    """A data resource found inside a data-only directory.
+
+    Describes one logical dataset — which may be a flat collection of files, a
+    Hive-partitioned tree, an Iceberg/Delta table, a Zarr store, or any other
+    recognised on-disk layout.
+
+    The ``path`` field is a human-readable basename that identifies the resource:
+
+    - Single file: ``"data.csv"``
+    - Multi-file series: ``"part*.parquet"`` (glob-style, common prefix + ``*`` + ext)
+    - Directory-as-dataset (Hive partition, Zarr store, …): ``"year=2024/"``
+
+    The ``modality`` field classifies the broad nature of the data using the
+    vocabulary established by intake's ``structure`` tags and napari's layer
+    type system:
+
+    - ``"tabular"``    — row/column data (CSV, Parquet, ORC, Excel, …)
+    - ``"array"``      — N-dimensional arrays (NumPy, HDF5, NetCDF, Zarr, …)
+    - ``"image"``      — 2-D/3-D images (PNG, JPEG, TIFF, DICOM, NIfTI, …)
+    - ``"timeseries"`` — time-indexed signals (WAV, GRIB, …)
+    - ``"geospatial"`` — vector/raster geodata (Shapefile, GeoJSON, GeoTIFF, …)
+    - ``"model"``      — ML model weights (GGUF, SafeTensors, PyTorch, …)
+    - ``"nested"``     — hierarchical / JSON-like (Avro, YAML, XML, …)
+    - ``"document"``   — human-readable documents (PDF, DOCX, …)
+    - ``"video"``      — video streams (MP4, AVI, …)
+    - ``"archive"``    — compressed bundles (ZIP, tar.gz, …)
+    - ``""``           — unknown / mixed
+
+    The ``schema`` field is format-specific:
+
+    - Tabular (Parquet, Arrow, CSV, …): ``{column_name: dtype_str, …}``
+    - Image / array: ``{"width": int, "height": int, "channels": int, "mode": str}``
+    - Audio: ``{"sample_rate": int, "channels": int, "frames": int}``
+    - HDF5 / Zarr / NetCDF: ``{"variables": [...], "dims": {...}, "attrs": {...}}``
+    - Unknown / library not available: ``{}``
+    """
+
+    path: str  # basename (or glob pattern / dir/ ) identifying this resource
+    format: str  # canonical format string, e.g. "parquet", "csv", "png", "hdf5"
+    modality: str = ""  # broad data nature; see docstring for vocabulary
+    layout: str = ""  # "flat"|"hive"|"iceberg"|"delta"|"zarr_store"|"tiledarray"|""
+    file_count: int = 0
+    total_size: int = 0  # bytes; 0 when unknown (e.g. remote FS without size info)
+    schema: dict | list = field(default_factory=dict)
+    # full path to one representative file, for use by preview loaders
+    sample_path: str = ""
+    metadata: dict = field(default_factory=dict)  # catch-all extras
+    _html = None
+
+    def __repr__(self) -> str:
+        from projspec.content.data_html import repr_text
+
+        return repr_text(self)
+
+    def _repr_html_(self) -> str:
+        """Jupyter rich display — returns cached HTML, rendering on first call."""
+        if self._html is None:
+            from projspec.content.data_html import repr_html
+
+            self._html = repr_html(self)
+        return self._html
+
+    def to_dict(self, compact=False):
+        d = super().to_dict(compact=compact)
+        if not compact:
+            d["_html"] = self._repr_html_()
+        return d