From b7b38fc16bde262fb534111733ed2d2891ee7108 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 16 Apr 2026 16:35:31 -0400 Subject: [PATCH 1/9] clean --- src/projspec/content/data.py | 2 ++ tests/test_data_project.py | 46 ++++-------------------------------- 2 files changed, 7 insertions(+), 41 deletions(-) diff --git a/src/projspec/content/data.py b/src/projspec/content/data.py index 5b46317..2106553 100644 --- a/src/projspec/content/data.py +++ b/src/projspec/content/data.py @@ -92,6 +92,8 @@ def __repr__(self) -> str: def _repr_html_(self) -> str: """Jupyter rich display — returns cached HTML, rendering on first call.""" + # TODO: this is probably not what we want jupyter to dysplay, but it's + # convenient for now. if self._html is None: from projspec.content.data_html import repr_html diff --git a/tests/test_data_project.py b/tests/test_data_project.py index 9f71ff0..3dae345 100644 --- a/tests/test_data_project.py +++ b/tests/test_data_project.py @@ -8,21 +8,11 @@ from projspec.utils import from_dict -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - - def _data_project(tmp_path): """Return a projspec.Project rooted at *tmp_path* (no walk needed).""" return projspec.Project(str(tmp_path)) -# --------------------------------------------------------------------------- -# Detection tests -# --------------------------------------------------------------------------- - - class TestDataDetection: def test_csv_detected(self, tmp_path): (tmp_path / "data.csv").write_text("x,y\n1,2\n3,4\n") @@ -45,11 +35,6 @@ def test_no_data_files_not_detected(self, tmp_path): assert "data" not in proj.specs -# --------------------------------------------------------------------------- -# Parse / DataResource field tests -# --------------------------------------------------------------------------- - - class TestDataParse: def test_single_csv_resource(self, tmp_path): (tmp_path / "sales.csv").write_text("col1,col2\n1,a\n2,b\n") @@ -98,11 +83,6 @@ def test_total_size_nonzero(self, tmp_path): assert dr.total_size > 0 -# --------------------------------------------------------------------------- -# Serialisation: to_dict -# --------------------------------------------------------------------------- - - class TestDataResourceToDict: def _make_dr(self, tmp_path): (tmp_path / "items.csv").write_text("id,val\n1,a\n2,b\n") @@ -121,11 +101,6 @@ def test_compact_omits_html(self, tmp_path): assert "_html" not in d -# --------------------------------------------------------------------------- -# Serialisation: from_dict round-trip -# --------------------------------------------------------------------------- - - class TestDataResourceRoundTrip: def _roundtrip(self, dr): """Serialise to JSON and rehydrate, returning the new DataResource.""" @@ -211,23 +186,14 @@ def test_roundtrip_html_survives_missing_sample_path(self, tmp_path): assert dr2._repr_html_() == html_original -# --------------------------------------------------------------------------- -# Conditional parse: sentinel / byte-majority logic -# --------------------------------------------------------------------------- - - class TestDataConditionalParse: """Tests for the 'other project types present' guard in Data.parse().""" - # -- helpers -- - def _big_csv(self, path, rows=500): """Write a CSV large enough to dominate byte counts.""" content = "id,value\n" + "\n".join(f"{i},{i * 2}" for i in range(rows)) path.write_text(content) - # -- pure data directories (no sentinels) -- - def test_pure_data_dir_no_sentinel(self, tmp_path): """No sentinel → Data always parsed regardless of byte ratios.""" (tmp_path / "data.csv").write_text("x\n1\n") @@ -248,10 +214,8 @@ def test_dvc_companion_not_a_sentinel(self, tmp_path): proj = _data_project(tmp_path) assert "data" in proj.specs - # -- mixed dirs where data dominates -- - def test_sentinel_present_data_majority(self, tmp_path): - """Sentinel present but data files are majority of bytes → Data parsed.""" + """Sentinel is present, but data files are the majority of bytes → Data parsed.""" self._big_csv(tmp_path / "data.csv") # large data file (tmp_path / "pyproject.toml").write_text( "[project]\nname='x'\n" @@ -299,7 +263,7 @@ def test_has_non_data_sentinels_true(self, tmp_path): (tmp_path / "data.csv").write_text("x\n1\n") (tmp_path / "pyproject.toml").write_text("") - proj = projspec.Project.__new__(projspec.Project) + proj = object.__new__(projspec.Project) import fsspec proj.fs = fsspec.filesystem("file") @@ -317,7 +281,7 @@ def test_has_non_data_sentinels_false(self, tmp_path): from projspec.proj.data_dir import Data (tmp_path / "data.csv").write_text("x\n1\n") - proj = projspec.Project.__new__(projspec.Project) + proj = object.__new__(projspec.Project) import fsspec proj.fs = fsspec.filesystem("file") @@ -336,7 +300,7 @@ def test_data_bytes_majority_true(self, tmp_path): self._big_csv(tmp_path / "data.csv") (tmp_path / "small.py").write_text("x = 1\n") - proj = projspec.Project.__new__(projspec.Project) + proj = object.__new__(projspec.Project) import fsspec proj.fs = fsspec.filesystem("file") @@ -351,7 +315,7 @@ def test_data_bytes_majority_false(self, tmp_path): (tmp_path / "main.py").write_text("x = 1\n" * 5000) (tmp_path / "tiny.csv").write_text("a\n1\n") - proj = projspec.Project.__new__(projspec.Project) + proj = object.__new__(projspec.Project) import fsspec proj.fs = fsspec.filesystem("file") From 8702adf35cb8fb559d69d9c7f49f7b28918b259d Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 16 Apr 2026 18:39:00 -0400 Subject: [PATCH 2/9] bunch-o-types --- docs/source/api.rst | 80 +++++++++++++++++ src/projspec/artifact/__init__.py | 5 ++ src/projspec/content/__init__.py | 10 +++ src/projspec/content/cicd.py | 48 +++++++++-- src/projspec/proj/__init__.py | 124 ++++++++++++++++++++++---- src/projspec/proj/webapp.py | 139 +++++++++++++++++++++++++++++- tests/test_roundtrips.py | 23 +++++ 7 files changed, 401 insertions(+), 28 deletions(-) diff --git a/docs/source/api.rst b/docs/source/api.rst index 026c763..acd304d 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -35,11 +35,28 @@ User Classes content.metadata.Licensed proj.ai.AIEnabled proj.briefcase.Briefcase + proj.cicd.GitHubActions + proj.cicd.GitLabCI + proj.cicd.CircleCI + proj.cicd.Taskfile + proj.cicd.JustFile + proj.cicd.Tox proj.conda_package.CondaRecipe proj.conda_package.RattlerRecipe proj.conda_project.CondaProject proj.datapackage.DVCRepo proj.datapackage.DataPackage + proj.dataworkflows.Dbt + proj.dataworkflows.Quarto + proj.dataworkflows.Prefect + proj.dataworkflows.Dagster + proj.dataworkflows.Kedro + proj.dataworkflows.Airflow + proj.dataworkflows.Snakemake + proj.dataworkflows.Nox + proj.docs.MkDocs + proj.docs.Sphinx + proj.docs.Docusaurus proj.documentation.MDBook proj.documentation.RTD proj.git.GitRepo @@ -51,6 +68,21 @@ User Classes proj.ide.NvidiaAIWorkbench proj.ide.VSCode proj.ide.Zed + proj.infra.DockerCompose + proj.infra.Terraform + proj.infra.Ansible + proj.infra.Pulumi + proj.infra.CDK + proj.infra.Earthfile + proj.infra.Nixpacks + proj.infra.Vagrant + proj.jsframeworks.NextJS + proj.jsframeworks.NuxtJS + proj.jsframeworks.SvelteKit + proj.jsframeworks.Vite + proj.jsframeworks.Pnpm + proj.jsframeworks.Bun + proj.jsframeworks.Deno proj.node.JLabExtension proj.node.Node proj.node.Yarn @@ -66,7 +98,9 @@ User Classes proj.uv.Uv proj.uv.UvScript proj.webapp.Django + proj.webapp.Gradio proj.webapp.Marimo + proj.webapp.Shiny proj.webapp.Streamlit proj.workflows.MLFlow @@ -78,11 +112,28 @@ User Classes .. autoclass:: projspec.content.metadata.Licensed .. autoclass:: projspec.proj.ai.AIEnabled .. autoclass:: projspec.proj.briefcase.Briefcase +.. autoclass:: projspec.proj.cicd.GitHubActions +.. autoclass:: projspec.proj.cicd.GitLabCI +.. autoclass:: projspec.proj.cicd.CircleCI +.. autoclass:: projspec.proj.cicd.Taskfile +.. autoclass:: projspec.proj.cicd.JustFile +.. autoclass:: projspec.proj.cicd.Tox .. autoclass:: projspec.proj.conda_package.CondaRecipe .. autoclass:: projspec.proj.conda_package.RattlerRecipe .. autoclass:: projspec.proj.conda_project.CondaProject .. autoclass:: projspec.proj.datapackage.DVCRepo .. autoclass:: projspec.proj.datapackage.DataPackage +.. autoclass:: projspec.proj.dataworkflows.Dbt +.. autoclass:: projspec.proj.dataworkflows.Quarto +.. autoclass:: projspec.proj.dataworkflows.Prefect +.. autoclass:: projspec.proj.dataworkflows.Dagster +.. autoclass:: projspec.proj.dataworkflows.Kedro +.. autoclass:: projspec.proj.dataworkflows.Airflow +.. autoclass:: projspec.proj.dataworkflows.Snakemake +.. autoclass:: projspec.proj.dataworkflows.Nox +.. autoclass:: projspec.proj.docs.MkDocs +.. autoclass:: projspec.proj.docs.Sphinx +.. autoclass:: projspec.proj.docs.Docusaurus .. autoclass:: projspec.proj.documentation.MDBook .. autoclass:: projspec.proj.documentation.RTD .. autoclass:: projspec.proj.git.GitRepo @@ -94,6 +145,21 @@ User Classes .. autoclass:: projspec.proj.ide.NvidiaAIWorkbench .. autoclass:: projspec.proj.ide.VSCode .. autoclass:: projspec.proj.ide.Zed +.. autoclass:: projspec.proj.infra.DockerCompose +.. autoclass:: projspec.proj.infra.Terraform +.. autoclass:: projspec.proj.infra.Ansible +.. autoclass:: projspec.proj.infra.Pulumi +.. autoclass:: projspec.proj.infra.CDK +.. autoclass:: projspec.proj.infra.Earthfile +.. autoclass:: projspec.proj.infra.Nixpacks +.. autoclass:: projspec.proj.infra.Vagrant +.. autoclass:: projspec.proj.jsframeworks.NextJS +.. autoclass:: projspec.proj.jsframeworks.NuxtJS +.. autoclass:: projspec.proj.jsframeworks.SvelteKit +.. autoclass:: projspec.proj.jsframeworks.Vite +.. autoclass:: projspec.proj.jsframeworks.Pnpm +.. autoclass:: projspec.proj.jsframeworks.Bun +.. autoclass:: projspec.proj.jsframeworks.Deno .. autoclass:: projspec.proj.node.JLabExtension .. autoclass:: projspec.proj.node.Node .. autoclass:: projspec.proj.node.Yarn @@ -109,7 +175,9 @@ User Classes .. autoclass:: projspec.proj.uv.Uv .. autoclass:: projspec.proj.uv.UvScript .. autoclass:: projspec.proj.webapp.Django +.. autoclass:: projspec.proj.webapp.Gradio .. autoclass:: projspec.proj.webapp.Marimo +.. autoclass:: projspec.proj.webapp.Shiny .. autoclass:: projspec.proj.webapp.Streamlit .. autoclass:: projspec.proj.workflows.MLFlow @@ -133,6 +201,9 @@ User Classes ~~~~~~~~~~~~ .. autosummary:: + content.cicd.CIWorkflow + content.cicd.PipelineStage + content.cicd.ServiceDependency content.data.IntakeSource content.data.TabularData content.data.DataResource @@ -145,6 +216,9 @@ User Classes content.package.PythonPackage content.package.RustModule +.. autoclass:: projspec.content.cicd.CIWorkflow +.. autoclass:: projspec.content.cicd.PipelineStage +.. autoclass:: projspec.content.cicd.ServiceDependency .. autoclass:: projspec.content.data.IntakeSource .. autoclass:: projspec.content.data.DataResource .. autoclass:: projspec.content.data.TabularData @@ -182,6 +256,9 @@ User Classes artifact.container.DockerRuntime artifact.deployment.Deployment artifact.deployment.HelmDeployment + artifact.infra.ComposeStack + artifact.infra.StaticSite + artifact.infra.TerraformPlan artifact.installable.CondaPackage artifact.installable.SystemInstallablePackage artifact.installable.Wheel @@ -199,6 +276,9 @@ User Classes :members: .. autoclass:: projspec.artifact.deployment.HelmDeployment :members: +.. autoclass:: projspec.artifact.infra.ComposeStack +.. autoclass:: projspec.artifact.infra.StaticSite +.. autoclass:: projspec.artifact.infra.TerraformPlan .. autoclass:: projspec.artifact.installable.CondaPackage .. autoclass:: projspec.artifact.installable.SystemInstallablePackage .. autoclass:: projspec.artifact.installable.Wheel diff --git a/src/projspec/artifact/__init__.py b/src/projspec/artifact/__init__.py index 230edb1..c75695f 100644 --- a/src/projspec/artifact/__init__.py +++ b/src/projspec/artifact/__init__.py @@ -3,6 +3,7 @@ from projspec.artifact.base import BaseArtifact, FileArtifact from projspec.artifact.container import DockerImage from projspec.artifact.deployment import Deployment, HelmDeployment +from projspec.artifact.infra import ComposeStack, StaticSite, TerraformPlan from projspec.artifact.installable import CondaPackage, Wheel from projspec.artifact.linter import PreCommit from projspec.artifact.process import Process @@ -11,9 +12,12 @@ __all__ = [ "BaseArtifact", "FileArtifact", + "ComposeStack", "DockerImage", "Deployment", "HelmDeployment", + "StaticSite", + "TerraformPlan", "CondaPackage", "Wheel", "Process", @@ -21,4 +25,5 @@ "CondaEnv", "VirtualEnv", "LockFile", + "PreCommit", ] diff --git a/src/projspec/content/__init__.py b/src/projspec/content/__init__.py index 9fc87c9..6c38261 100644 --- a/src/projspec/content/__init__.py +++ b/src/projspec/content/__init__.py @@ -1,6 +1,12 @@ """Contents classes - information declared in project specs""" from projspec.content.base import BaseContent +from projspec.content.cicd import ( + CIWorkflow, + GithubAction, + PipelineStage, + ServiceDependency, +) from projspec.content.data import TabularData, IntakeSource from projspec.content.env_var import EnvironmentVariables from projspec.content.environment import Environment, Stack, Precision @@ -12,6 +18,10 @@ __all__ = [ "BaseContent", + "CIWorkflow", + "GithubAction", + "PipelineStage", + "ServiceDependency", "TabularData", "IntakeSource", "EnvironmentVariables", diff --git a/src/projspec/content/cicd.py b/src/projspec/content/cicd.py index 789c655..906fe4c 100644 --- a/src/projspec/content/cicd.py +++ b/src/projspec/content/cicd.py @@ -1,15 +1,49 @@ """Run definitions that are part of code productionalisation""" +from dataclasses import dataclass, field + from projspec.content import BaseContent -class GithubAction(BaseContent): - """A run prescription that runs in github on push/merge""" +@dataclass +class CIWorkflow(BaseContent): + """A CI/CD workflow or pipeline definition. + + Captures the name, triggering events, and high-level job/stage names from + CI configuration files (GitHub Actions, GitLab CI, CircleCI, etc.). + """ + + name: str = "" + triggers: list = field(default_factory=list) + jobs: list = field(default_factory=list) + provider: str = "" # e.g. "github", "gitlab", "circleci" + + +# Keep legacy stub under old name for backwards compatibility +GithubAction = CIWorkflow + + +@dataclass +class PipelineStage(BaseContent): + """A named stage or step in a data/ML/workflow pipeline. + + Used by dbt, Snakemake, Prefect, Airflow, Kedro, DVC, etc. + """ + + name: str = "" + cmd: list = field(default_factory=list) + depends_on: list = field(default_factory=list) + - # TODO: we probably want to extract out the jobs and runs, maybe the steps. - # It may be interesting to provide links to the browser or API to view - # details. - ... +@dataclass +class ServiceDependency(BaseContent): + """An external service that a project depends on at runtime. + Extracted from Docker Compose service definitions, Helm values, etc. + Examples: postgres, redis, kafka, elasticsearch. + """ -# TODO: there are many of these, but we don't extract much information from them + name: str = "" + service_type: str = "" # e.g. "postgres", "redis", "kafka" + version: str = "" + image: str = "" diff --git a/src/projspec/proj/__init__.py b/src/projspec/proj/__init__.py index 929fb17..a065ec8 100644 --- a/src/projspec/proj/__init__.py +++ b/src/projspec/proj/__init__.py @@ -5,16 +5,54 @@ from projspec.proj.ai import AIEnabled from projspec.proj.backstage import BackstageCatalog from projspec.proj.briefcase import Briefcase +from projspec.proj.cicd import ( + CircleCI, + GitHubActions, + GitLabCI, + JustFile, + Taskfile, + Tox, +) from projspec.proj.conda_package import CondaRecipe, RattlerRecipe from projspec.proj.conda_project import CondaProject from projspec.proj.data_dir import Data from projspec.proj.datapackage import DataPackage, DVCRepo +from projspec.proj.dataworkflows import ( + Airflow, + Dagster, + Dbt, + Kedro, + Nox, + Prefect, + Quarto, + Snakemake, +) +from projspec.proj.docs import Docusaurus, MkDocs, Sphinx from projspec.proj.documentation import RTD, MDBook from projspec.proj.git import GitRepo from projspec.proj.golang import Golang from projspec.proj.helm import HelmChart from projspec.proj.hf import HuggingFaceRepo from projspec.proj.ide import JetbrainsIDE, NvidiaAIWorkbench, VSCode +from projspec.proj.infra import ( + Ansible, + CDK, + DockerCompose, + Earthfile, + Nixpacks, + Pulumi, + Terraform, + Vagrant, +) +from projspec.proj.jsframeworks import ( + Bun, + Deno, + NextJS, + NuxtJS, + Pnpm, + SvelteKit, + Vite, +) from projspec.proj.node import JLabExtension, Node, Yarn from projspec.proj.pixi import Pixi from projspec.proj.poetry import Poetry @@ -23,44 +61,96 @@ from projspec.proj.python_code import PythonCode, PythonLibrary from projspec.proj.rust import Rust, RustPython from projspec.proj.uv import Uv -from projspec.proj.webapp import Django, Marimo, Streamlit +from projspec.proj.webapp import Django, Gradio, Marimo, Shiny, Streamlit from projspec.proj.workflows import MLFlow __all__ = [ "ParseFailed", "Project", "ProjectSpec", - "AIEnabled", - "BackstageCatalog", - "Briefcase", - "Cited", - "Zenodo", + # CI/CD + "CircleCI", + "GitHubActions", + "GitLabCI", + "JustFile", + "Taskfile", + "Tox", + # Conda "CondaRecipe", "CondaProject", + # Data "Data", - "Golang", + "DataPackage", + "DVCRepo", + # Data/ML workflows + "Airflow", + "Dagster", + "Dbt", + "Kedro", + "Nox", + "Prefect", + "Quarto", + "Snakemake", + # Documentation + "Docusaurus", + "MkDocs", + "MDBook", + "RTD", + "Sphinx", + # Git "GitRepo", + # Go + "Golang", + # Helm/K8s "HelmChart", + # HuggingFace "HuggingFaceRepo", + # IDE + "AIEnabled", + "BackstageCatalog", + "Briefcase", + "Cited", + "Zenodo", "JetbrainsIDE", - "JLabExtension", - "Marimo", - "MDBook", - "MLFlow", "NvidiaAIWorkbench", + "VSCode", + # Infrastructure + "Ansible", + "CDK", + "DockerCompose", + "Earthfile", + "Nixpacks", + "Pulumi", + "Terraform", + "Vagrant", + # JavaScript frameworks + "Bun", + "Deno", + "NextJS", + "NuxtJS", + "Pnpm", + "SvelteKit", + "Vite", + # Node + "JLabExtension", "Node", - "Poetry", - "RattlerRecipe", + "Yarn", + # Python packaging "Pixi", + "Poetry", "PyScript", "PythonCode", "PythonLibrary", - "RTD", + "Uv", + # Rust "Rust", "RustPython", + # Web apps "Django", + "Gradio", + "Marimo", + "Shiny", "Streamlit", - "Uv", - "VSCode", - "Yarn", + # Workflows + "MLFlow", ] diff --git a/src/projspec/proj/webapp.py b/src/projspec/proj/webapp.py index 6761442..1e72eb3 100644 --- a/src/projspec/proj/webapp.py +++ b/src/projspec/proj/webapp.py @@ -1,7 +1,7 @@ import os from projspec.proj import ProjectSpec, ParseFailed -from projspec.utils import _ipynb_to_py, run_subprocess +from projspec.utils import _ipynb_to_py, run_subprocess, AttrDict # TODO: webapp Servers should (optionally?) call threading.Timer(0.5, webbrowser.open(..)); # but then it must not block, and we need to set/infer the URL including port. @@ -58,7 +58,10 @@ class Streamlit(ProjectSpec): spec_doc = "https://docs.streamlit.io/deploy/streamlit-community-cloud/deploy-your-app/file-organization" # see also "https://docs.streamlit.io/develop/api-reference/configuration/config.toml", which is # mainly theme and server config. - server_args = {"port_arg": "--server.address", "address_arg": "--server.port"} + server_args = { + "port_arg": "--server.address", + "address_arg": "--server.port", + } def match(self) -> bool: # more possible layouts @@ -170,7 +173,9 @@ def parse(self) -> None: if has_import and has_app: name = path.rsplit("/", 1)[-1].replace(".py", "") self.artifacts["server"][name] = Server( - proj=self.proj, cmd=["marimo", "run", path], **self.server_args + proj=self.proj, + cmd=["marimo", "run", path], + **self.server_args, ) if not self.artifacts["server"]: @@ -284,7 +289,9 @@ def parse(self) -> None: if has_import and has_app: name = path.rsplit("/", 1)[-1].replace(".py", "") self.artifacts["server"][name] = Server( - proj=self.proj, cmd=["fastapi", "run", path], **self.server_args + proj=self.proj, + cmd=["fastapi", "run", path], + **self.server_args, ) if not self.artifacts["server"]: @@ -423,3 +430,127 @@ def _create(path: str) -> None: pn.panel("Hello World").servable() """ ) + + +class Gradio(ProjectSpec): + """Gradio machine learning demo and web app. + + Detected by scanning Python files for ``import gradio`` or ``gr.Interface`` / ``gr.Blocks``. + """ + + spec_doc = "https://www.gradio.app/docs/gradio/interface" + server_args = {"port_arg": "--server-port", "address_arg": "--server-name"} + + def match(self) -> bool: + return ( + any(fn.endswith(".py") for fn in self.proj.scanned_files) + or "app.py" in self.proj.basenames + ) + + def parse(self) -> None: + from projspec.artifact.process import Server + + servers = {} + for path, content in self.proj.scanned_files.items(): + if not path.endswith(".py"): + continue + content = content.decode() + has_import = "import gradio" in content or "from gradio" in content + has_app = ( + "gr.Interface(" in content + or "gr.Blocks(" in content + or "gradio.Interface(" in content + ) + if has_import and has_app: + name = path.rsplit("/", 1)[-1].replace(".py", "") + servers[name] = Server( + proj=self.proj, + cmd=["python", path], + **self.server_args, + ) + + if not servers: + raise ParseFailed + + self._contents = AttrDict() + self._artifacts = AttrDict(server=servers) + + @staticmethod + def _create(path: str) -> None: + with open(f"{path}/app.py", "wt") as f: + # https://www.gradio.app/guides/quickstart + f.write( + """import gradio as gr + +def greet(name): + return f"Hello, {name}!" + +demo = gr.Interface(fn=greet, inputs="text", outputs="text") + +if __name__ == "__main__": + demo.launch() +""" + ) + + +class Shiny(ProjectSpec): + """Shiny for Python web application. + + Detected by scanning Python files for ``from shiny import`` combined with + ``app = App(`` or ``@app.`` decorator usage. Also detects ``app.py`` at root. + """ + + spec_doc = "https://shiny.posit.co/py/docs/overview.html" + server_args = {"port_arg": "--port", "address_arg": "--host"} + + def match(self) -> bool: + return ( + any(fn.endswith(".py") for fn in self.proj.scanned_files) + or "app.py" in self.proj.basenames + ) + + def parse(self) -> None: + from projspec.artifact.process import Server + + servers = {} + for path, content in self.proj.scanned_files.items(): + if not path.endswith(".py"): + continue + content = content.decode() + has_import = "from shiny" in content or "import shiny" in content + has_app = "App(" in content or "@app." in content or "app_ui" in content + if has_import and has_app: + name = path.rsplit("/", 1)[-1].replace(".py", "") + servers[name] = Server( + proj=self.proj, + cmd=["shiny", "run", path], + **self.server_args, + ) + + if not servers: + raise ParseFailed + + self._contents = AttrDict() + self._artifacts = AttrDict(server=servers) + + @staticmethod + def _create(path: str) -> None: + with open(f"{path}/app.py", "wt") as f: + # https://shiny.posit.co/py/docs/overview.html + f.write( + """from shiny import App, render, ui + +app_ui = ui.page_fluid( + ui.h2("Hello, Shiny!"), + ui.input_text("name", "Enter your name:", value="World"), + ui.output_text_verbatim("greeting"), +) + +def server(input, output, session): + @render.text + def greeting(): + return f"Hello, {input.name()}!" + +app = App(app_ui, server) +""" + ) diff --git a/tests/test_roundtrips.py b/tests/test_roundtrips.py index 48d4985..fa2cc33 100644 --- a/tests/test_roundtrips.py +++ b/tests/test_roundtrips.py @@ -36,6 +36,29 @@ "MDBook", "RTD", "BackstageCatalog", + # CI/CD — file-only _create() + "GitHubActions", + "GitLabCI", + "CircleCI", + "Taskfile", + "JustFile", + "Tox", + # Data / ML workflows — file-only _create() + "Dbt", + "Quarto", + "Nox", + # Documentation — file-only _create() + "MkDocs", + "Sphinx", + # Infrastructure — file-only _create() + "DockerCompose", + "Terraform", + "Ansible", + "Pulumi", + "CDK", + "Earthfile", + "Nixpacks", + "Vagrant", ], ) def test_compliant(tmpdir, cls_name): From f0195dbf6a3552f13cdf141ba5ad9f62b42b057b Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 16 Apr 2026 18:45:47 -0400 Subject: [PATCH 3/9] Add missing --- docs/source/api.rst | 12 +- src/projspec/artifact/infra.py | 71 ++++ src/projspec/proj/__init__.py | 3 +- src/projspec/proj/cicd.py | 441 ++++++++++++++++++++++ src/projspec/proj/dataworkflows.py | 582 +++++++++++++++++++++++++++++ src/projspec/proj/documentation.py | 242 ++++++++++++ src/projspec/proj/infra.py | 574 ++++++++++++++++++++++++++++ src/projspec/proj/jsframeworks.py | 411 ++++++++++++++++++++ 8 files changed, 2328 insertions(+), 8 deletions(-) create mode 100644 src/projspec/artifact/infra.py create mode 100644 src/projspec/proj/cicd.py create mode 100644 src/projspec/proj/dataworkflows.py create mode 100644 src/projspec/proj/infra.py create mode 100644 src/projspec/proj/jsframeworks.py diff --git a/docs/source/api.rst b/docs/source/api.rst index acd304d..d6e4405 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -54,11 +54,11 @@ User Classes proj.dataworkflows.Airflow proj.dataworkflows.Snakemake proj.dataworkflows.Nox - proj.docs.MkDocs - proj.docs.Sphinx - proj.docs.Docusaurus + proj.documentation.Docusaurus proj.documentation.MDBook + proj.documentation.MkDocs proj.documentation.RTD + proj.documentation.Sphinx proj.git.GitRepo proj.golang.Golang proj.helm.HelmChart @@ -131,11 +131,11 @@ User Classes .. autoclass:: projspec.proj.dataworkflows.Airflow .. autoclass:: projspec.proj.dataworkflows.Snakemake .. autoclass:: projspec.proj.dataworkflows.Nox -.. autoclass:: projspec.proj.docs.MkDocs -.. autoclass:: projspec.proj.docs.Sphinx -.. autoclass:: projspec.proj.docs.Docusaurus +.. autoclass:: projspec.proj.documentation.Docusaurus .. autoclass:: projspec.proj.documentation.MDBook +.. autoclass:: projspec.proj.documentation.MkDocs .. autoclass:: projspec.proj.documentation.RTD +.. autoclass:: projspec.proj.documentation.Sphinx .. autoclass:: projspec.proj.git.GitRepo .. autoclass:: projspec.proj.golang.Golang .. autoclass:: projspec.proj.helm.HelmChart diff --git a/src/projspec/artifact/infra.py b/src/projspec/artifact/infra.py new file mode 100644 index 0000000..8ffc543 --- /dev/null +++ b/src/projspec/artifact/infra.py @@ -0,0 +1,71 @@ +"""Infrastructure and deployment artifact types.""" + +from projspec.artifact.base import BaseArtifact, FileArtifact +from projspec.proj.base import Project +from projspec.utils import run_subprocess + + +class ComposeStack(BaseArtifact): + """A multi-service stack managed by Docker Compose. + + ``make()`` runs ``docker compose up -d`` + ``clean()`` runs ``docker compose down`` + ``state`` is inferred by ``docker compose ps`` (checks for running services). + """ + + def __init__(self, proj: Project, file: str = "docker-compose.yml", **kwargs): + self.compose_file = file + cmd = ["docker", "compose", "-f", file, "up", "-d"] + super().__init__(proj, cmd=cmd, **kwargs) + + def _make(self, **kwargs): + run_subprocess(self.cmd, cwd=self.proj.url, output=False, **kwargs) + + def clean(self): + run_subprocess( + ["docker", "compose", "-f", self.compose_file, "down"], + cwd=self.proj.url, + output=False, + ) + + def _is_done(self) -> bool: + try: + result = run_subprocess( + ["docker", "compose", "-f", self.compose_file, "ps", "-q"], + cwd=self.proj.url, + ) + return bool(result.stdout.strip()) + except Exception: + return False + + def _is_clean(self) -> bool: + return not self._is_done() + + +class StaticSite(FileArtifact): + """A static website produced by a build tool (MkDocs, Sphinx, Docusaurus, Quarto, etc.). + + ``fn`` should be the glob pattern for the output index file, e.g. + ``/site/index.html``. + """ + + pass + + +class TerraformPlan(FileArtifact): + """A saved Terraform execution plan file (``terraform plan -out plan.tfplan``). + + ``make()`` runs ``terraform plan -out plan.tfplan`` + ``clean()`` deletes the plan file + """ + + def __init__(self, proj: Project, plan_file: str = "plan.tfplan", **kwargs): + fn = f"{proj.url}/{plan_file}" + cmd = ["terraform", "plan", "-out", plan_file] + super().__init__(proj, fn=fn, cmd=cmd, **kwargs) + + def clean(self): + try: + self.proj.fs.rm(self.fn) + except FileNotFoundError: + pass diff --git a/src/projspec/proj/__init__.py b/src/projspec/proj/__init__.py index a065ec8..6c2342a 100644 --- a/src/projspec/proj/__init__.py +++ b/src/projspec/proj/__init__.py @@ -27,8 +27,7 @@ Quarto, Snakemake, ) -from projspec.proj.docs import Docusaurus, MkDocs, Sphinx -from projspec.proj.documentation import RTD, MDBook +from projspec.proj.documentation import RTD, MDBook, MkDocs, Sphinx, Docusaurus from projspec.proj.git import GitRepo from projspec.proj.golang import Golang from projspec.proj.helm import HelmChart diff --git a/src/projspec/proj/cicd.py b/src/projspec/proj/cicd.py new file mode 100644 index 0000000..803f3df --- /dev/null +++ b/src/projspec/proj/cicd.py @@ -0,0 +1,441 @@ +"""CI/CD project specs: GitHub Actions, GitLab CI, CircleCI, Taskfile, JustFile, Tox.""" + +import os + +import yaml + +from projspec.proj.base import ParseFailed, ProjectSpec, ProjectExtra +from projspec.utils import AttrDict + + +class GitHubActions(ProjectExtra): + """GitHub Actions CI/CD workflows defined in .github/workflows/. + + Each YAML file under .github/workflows/ defines one workflow. This spec + is a ``ProjectExtra`` so that its ``CIWorkflow`` content objects are merged + into the root project rather than appearing as a standalone project type. + """ + + spec_doc = "https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions" + + def match(self) -> bool: + # Check for the .github/workflows directory + workflows_dir = f"{self.proj.url}/.github/workflows" + try: + entries = self.proj.fs.ls(workflows_dir, detail=False) + return any(e.endswith((".yml", ".yaml")) for e in entries) + except (FileNotFoundError, NotADirectoryError, Exception): + return False + + def parse(self) -> None: + from projspec.content.cicd import CIWorkflow + + workflows_dir = f"{self.proj.url}/.github/workflows" + try: + entries = self.proj.fs.ls(workflows_dir, detail=False) + except Exception as exc: + raise ParseFailed(f"Could not list .github/workflows: {exc}") from exc + + ci_workflows = AttrDict() + for entry in entries: + if not entry.endswith((".yml", ".yaml")): + continue + try: + with self.proj.fs.open(entry, "rt") as f: + wf = yaml.safe_load(f) + except Exception: + continue + if not isinstance(wf, dict): + continue + + name = wf.get( + "name", + os.path.basename(entry).replace(".yml", "").replace(".yaml", ""), + ) + on = wf.get("on", wf.get(True, {})) # 'on' is a YAML boolean alias + triggers = [] + if isinstance(on, dict): + triggers = list(on.keys()) + elif isinstance(on, list): + triggers = on + elif isinstance(on, str): + triggers = [on] + + jobs = list(wf.get("jobs", {}).keys()) + key = name.lower().replace(" ", "_").replace("-", "_") + ci_workflows[key] = CIWorkflow( + proj=self.proj, + name=name, + triggers=[str(t) for t in triggers], + jobs=jobs, + provider="github", + ) + + if not ci_workflows: + raise ParseFailed("No valid GitHub Actions workflows found") + + self._contents = AttrDict(ci_workflow=ci_workflows) + self._artifacts = AttrDict() + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal GitHub Actions CI workflow.""" + workflows_dir = os.path.join(path, ".github", "workflows") + os.makedirs(workflows_dir, exist_ok=True) + with open(os.path.join(workflows_dir, "ci.yml"), "wt") as f: + f.write( + "name: CI\n" + "\n" + "on:\n" + " push:\n" + " branches: [main]\n" + " pull_request:\n" + " branches: [main]\n" + "\n" + "jobs:\n" + " test:\n" + " runs-on: ubuntu-latest\n" + " steps:\n" + " - uses: actions/checkout@v4\n" + " - name: Run tests\n" + " run: echo 'Add your test command here'\n" + ) + + +class GitLabCI(ProjectExtra): + """GitLab CI/CD pipeline defined in .gitlab-ci.yml. + + This spec is a ``ProjectExtra`` so its ``CIWorkflow`` content is merged + into the root project. + """ + + spec_doc = "https://docs.gitlab.com/ci/yaml/" + + def match(self) -> bool: + return ".gitlab-ci.yml" in self.proj.basenames + + def parse(self) -> None: + from projspec.content.cicd import CIWorkflow + + try: + with self.proj.get_file(".gitlab-ci.yml") as f: + cfg = yaml.safe_load(f) + except Exception as exc: + raise ParseFailed(f"Could not read .gitlab-ci.yml: {exc}") from exc + + if not isinstance(cfg, dict): + raise ParseFailed(".gitlab-ci.yml did not parse to a mapping") + + stages = cfg.get("stages", []) + # Jobs are any top-level keys that are not reserved keywords + reserved = { + "stages", + "variables", + "include", + "workflow", + "default", + "image", + "services", + "before_script", + "after_script", + "cache", + } + jobs = [k for k in cfg if k not in reserved and not k.startswith(".")] + + self._contents = AttrDict( + ci_workflow=CIWorkflow( + proj=self.proj, + name="GitLab CI", + triggers=stages, + jobs=jobs, + provider="gitlab", + ) + ) + self._artifacts = AttrDict() + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal .gitlab-ci.yml.""" + with open(os.path.join(path, ".gitlab-ci.yml"), "wt") as f: + f.write( + "stages:\n" + " - test\n" + "\n" + "test:\n" + " stage: test\n" + " script:\n" + " - echo 'Add your test command here'\n" + ) + + +class CircleCI(ProjectExtra): + """CircleCI pipeline defined in .circleci/config.yml. + + This spec is a ``ProjectExtra`` so its ``CIWorkflow`` content is merged + into the root project. + """ + + spec_doc = "https://circleci.com/docs/configuration-reference/" + + def match(self) -> bool: + config_path = f"{self.proj.url}/.circleci/config.yml" + try: + return self.proj.fs.isfile(config_path) + except Exception: + return False + + def parse(self) -> None: + from projspec.content.cicd import CIWorkflow + + config_path = f"{self.proj.url}/.circleci/config.yml" + try: + with self.proj.fs.open(config_path, "rt") as f: + cfg = yaml.safe_load(f) + except Exception as exc: + raise ParseFailed(f"Could not read .circleci/config.yml: {exc}") from exc + + if not isinstance(cfg, dict): + raise ParseFailed(".circleci/config.yml did not parse to a mapping") + + jobs = list(cfg.get("jobs", {}).keys()) + workflows = cfg.get("workflows", {}) + workflow_names = [k for k in workflows if k != "version"] + + self._contents = AttrDict( + ci_workflow=CIWorkflow( + proj=self.proj, + name="CircleCI", + triggers=workflow_names, + jobs=jobs, + provider="circleci", + ) + ) + self._artifacts = AttrDict() + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal CircleCI config.""" + circleci_dir = os.path.join(path, ".circleci") + os.makedirs(circleci_dir, exist_ok=True) + with open(os.path.join(circleci_dir, "config.yml"), "wt") as f: + f.write( + "version: 2.1\n" + "\n" + "jobs:\n" + " test:\n" + " docker:\n" + " - image: cimg/base:stable\n" + " steps:\n" + " - checkout\n" + " - run: echo 'Add your test command here'\n" + "\n" + "workflows:\n" + " main:\n" + " jobs:\n" + " - test\n" + ) + + +class Taskfile(ProjectSpec): + """Task runner using Taskfile (go-task). + + Taskfile.yml (or Taskfile.yaml / taskfile.yml) defines named tasks that + can be run with ``task ``. + """ + + spec_doc = "https://taskfile.dev/reference/schema/" + + _NAMES = {"Taskfile.yml", "Taskfile.yaml", "taskfile.yml", "taskfile.yaml"} + + def match(self) -> bool: + return bool(self._NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.process import Process + from projspec.content.executable import Command + + fname = next(n for n in self._NAMES if n in self.proj.basenames) + try: + with self.proj.get_file(fname) as f: + cfg = yaml.safe_load(f) + except Exception as exc: + raise ParseFailed(f"Could not read {fname}: {exc}") from exc + + if not isinstance(cfg, dict): + raise ParseFailed(f"{fname} did not parse to a mapping") + + tasks = cfg.get("tasks", {}) + cmds = AttrDict() + arts = AttrDict() + for task_name, task_def in tasks.items(): + if not task_name or task_name.startswith("_"): + continue + cmd = ["task", task_name] + cmds[task_name] = Command(proj=self.proj, cmd=cmd) + arts[task_name] = Process(proj=self.proj, cmd=cmd) + + self._contents = AttrDict(command=cmds) if cmds else AttrDict() + self._artifacts = AttrDict(process=arts) if arts else AttrDict() + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Taskfile.yml.""" + with open(os.path.join(path, "Taskfile.yml"), "wt") as f: + f.write( + "version: '3'\n" + "\n" + "tasks:\n" + " default:\n" + " desc: Default task\n" + " cmds:\n" + " - echo 'Hello from Taskfile!'\n" + "\n" + " test:\n" + " desc: Run tests\n" + " cmds:\n" + " - echo 'Add your test command here'\n" + ) + + +class JustFile(ProjectSpec): + """Task runner using Just (justfile / Justfile). + + A justfile defines named recipes that can be run with ``just ``. + """ + + spec_doc = "https://just.systems/man/en/" + + _NAMES = {"justfile", "Justfile", ".justfile"} + + def match(self) -> bool: + return bool(self._NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + import re + from projspec.artifact.process import Process + from projspec.content.executable import Command + + fname = next(n for n in self._NAMES if n in self.proj.basenames) + try: + with self.proj.get_file(fname) as f: + text = f.read() + except Exception as exc: + raise ParseFailed(f"Could not read {fname}: {exc}") from exc + + # Recipes are lines matching: recipe-name ...: (not starting with #/@/space) + recipe_names = re.findall( + r"^([a-zA-Z_][a-zA-Z0-9_-]*)(?:\s.*)?:", text, re.MULTILINE + ) + + cmds = AttrDict() + arts = AttrDict() + for name in recipe_names: + cmd = ["just", name] + cmds[name] = Command(proj=self.proj, cmd=cmd) + arts[name] = Process(proj=self.proj, cmd=cmd) + + self._contents = AttrDict(command=cmds) if cmds else AttrDict() + self._artifacts = AttrDict(process=arts) if arts else AttrDict() + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal justfile.""" + with open(os.path.join(path, "justfile"), "wt") as f: + f.write( + "# Default recipe\n" + "default:\n" + " echo 'Hello from just!'\n" + "\n" + "# Run tests\n" + "test:\n" + " echo 'Add your test command here'\n" + ) + + +class Tox(ProjectSpec): + """Python test automation using tox. + + Reads ``tox.ini`` or ``tox.toml`` (or ``[tool.tox]`` in ``pyproject.toml``) + and exposes each test environment as a ``Command`` and ``Process``. + """ + + spec_doc = "https://tox.wiki/en/stable/config.html" + + def match(self) -> bool: + if "tox.ini" in self.proj.basenames or "tox.toml" in self.proj.basenames: + return True + return bool(self.proj.pyproject.get("tool", {}).get("tox")) + + def parse(self) -> None: + import configparser + import re + from projspec.artifact.process import Process + from projspec.content.executable import Command + + env_names: list[str] = [] + + if "tox.ini" in self.proj.basenames: + try: + with self.proj.get_file("tox.ini") as f: + text = f.read() + cfg = configparser.ConfigParser() + cfg.read_string(text) + # envlist can be a comma/space/newline separated list with optional braces + envlist_raw = cfg.get("tox", "envlist", fallback="") + if envlist_raw: + # Strip braces notation like {py39,py310}-django + flat = re.sub(r"\{[^}]*\}", "", envlist_raw) + env_names = [ + e.strip() for e in re.split(r"[,\s]+", flat) if e.strip() + ] + # Also pick up [testenv:*] sections + for section in cfg.sections(): + if section.startswith("testenv:"): + name = section[len("testenv:") :] + if name not in env_names: + env_names.append(name) + except Exception as exc: + raise ParseFailed(f"Could not parse tox.ini: {exc}") from exc + + elif "tox.toml" in self.proj.basenames: + try: + import toml + from projspec.utils import PickleableTomlDecoder + + with self.proj.get_file("tox.toml", text=False) as f: + cfg = toml.loads(f.read().decode(), decoder=PickleableTomlDecoder()) + env_names = list(cfg.get("env", {}).keys()) + except Exception as exc: + raise ParseFailed(f"Could not parse tox.toml: {exc}") from exc + + else: + tox_cfg = self.proj.pyproject.get("tool", {}).get("tox", {}) + env_names = list(tox_cfg.get("env", {}).keys()) + + cmds = AttrDict() + arts = AttrDict() + if not env_names: + # At minimum expose a generic tox run + cmds["tox"] = Command(proj=self.proj, cmd=["tox"]) + arts["tox"] = Process(proj=self.proj, cmd=["tox"]) + else: + for name in env_names: + cmd = ["tox", "-e", name] + cmds[name] = Command(proj=self.proj, cmd=cmd) + arts[name] = Process(proj=self.proj, cmd=cmd) + + self._contents = AttrDict(command=cmds) + self._artifacts = AttrDict(process=arts) + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal tox.ini.""" + with open(os.path.join(path, "tox.ini"), "wt") as f: + f.write( + "[tox]\n" + "envlist = py311\n" + "\n" + "[testenv]\n" + "deps = pytest\n" + "commands = pytest {posargs}\n" + ) diff --git a/src/projspec/proj/dataworkflows.py b/src/projspec/proj/dataworkflows.py new file mode 100644 index 0000000..425d716 --- /dev/null +++ b/src/projspec/proj/dataworkflows.py @@ -0,0 +1,582 @@ +"""Data/ML workflow specs: dbt, Quarto, Prefect, Dagster, Kedro, Airflow, Snakemake, Nox.""" + +import os +import re + +import yaml + +from projspec.proj.base import ParseFailed, ProjectSpec +from projspec.utils import AttrDict + + +class Dbt(ProjectSpec): + """dbt (data build tool) project. + + Detected by ``dbt_project.yml`` at the project root. + """ + + spec_doc = "https://docs.getdbt.com/reference/dbt_project.yml" + + def match(self) -> bool: + return "dbt_project.yml" in self.proj.basenames + + def parse(self) -> None: + from projspec.artifact.process import Process + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + try: + with self.proj.get_file("dbt_project.yml") as f: + cfg = yaml.safe_load(f) + except Exception as exc: + raise ParseFailed(f"Could not read dbt_project.yml: {exc}") from exc + + if not isinstance(cfg, dict): + raise ParseFailed("dbt_project.yml did not parse to a mapping") + + meta: dict[str, str] = {} + for key in ("name", "version", "profile"): + if val := cfg.get(key): + meta[key] = str(val) + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + # Standard dbt commands + dbt_cmds = { + "run": ["dbt", "run"], + "test": ["dbt", "test"], + "build": ["dbt", "build"], + "compile": ["dbt", "compile"], + "docs_generate": ["dbt", "docs", "generate"], + "docs_serve": ["dbt", "docs", "serve"], + "seed": ["dbt", "seed"], + "snapshot": ["dbt", "snapshot"], + "source_freshness": ["dbt", "source", "freshness"], + } + + cmds = AttrDict() + arts = AttrDict() + for name, cmd in dbt_cmds.items(): + cmds[name] = Command(proj=self.proj, cmd=cmd) + arts[name] = Process(proj=self.proj, cmd=cmd) + + conts["command"] = cmds + self._contents = conts + self._artifacts = AttrDict(process=arts) + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal dbt project.""" + name = os.path.basename(path) + with open(os.path.join(path, "dbt_project.yml"), "wt") as f: + f.write( + f"name: '{name}'\n" + "version: '1.0.0'\n" + "config-version: 2\n" + "\n" + "profile: 'default'\n" + "\n" + "model-paths: ['models']\n" + "seed-paths: ['seeds']\n" + "test-paths: ['tests']\n" + "snapshot-paths: ['snapshots']\n" + "\n" + "models:\n" + f" {name}:\n" + " +materialized: view\n" + ) + os.makedirs(os.path.join(path, "models"), exist_ok=True) + with open(os.path.join(path, "models", "example.sql"), "wt") as f: + f.write("SELECT 1 AS id, 'hello' AS greeting\n") + + +class Quarto(ProjectSpec): + """Quarto publishing system project. + + Detected by ``_quarto.yml`` / ``_quarto.yaml`` or any ``.qmd`` file at the root. + """ + + spec_doc = "https://quarto.org/docs/reference/projects/core.html" + + def match(self) -> bool: + if ( + "_quarto.yml" in self.proj.basenames + or "_quarto.yaml" in self.proj.basenames + ): + return True + return any(n.endswith(".qmd") for n in self.proj.basenames) + + def parse(self) -> None: + from projspec.artifact.infra import StaticSite + from projspec.artifact.process import Server + from projspec.content.metadata import DescriptiveMetadata + + cfg: dict = {} + for fname in ("_quarto.yml", "_quarto.yaml"): + if fname in self.proj.basenames: + try: + with self.proj.get_file(fname) as f: + cfg = yaml.safe_load(f) or {} + except Exception: + pass + break + + meta: dict[str, str] = {} + project = cfg.get("project", {}) + for key in ("title", "type"): + if val := project.get(key): + meta[key] = str(val) + book = cfg.get("book", {}) + for key in ("title", "author"): + if val := book.get(key): + meta[key] = str(val) + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + output_dir = project.get("output-dir", "_site") + arts = AttrDict( + render=StaticSite( + proj=self.proj, + cmd=["quarto", "render"], + fn=f"{self.proj.url}/{output_dir}/index.html", + ), + preview=Server( + proj=self.proj, + cmd=["quarto", "preview"], + ), + ) + + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Quarto project.""" + name = os.path.basename(path) + with open(os.path.join(path, "_quarto.yml"), "wt") as f: + f.write( + "project:\n" + " type: website\n" + "\n" + "website:\n" + f' title: "{name}"\n' + " navbar:\n" + " left:\n" + " - href: index.qmd\n" + " text: Home\n" + "\n" + "format:\n" + " html:\n" + " theme: cosmo\n" + ) + with open(os.path.join(path, "index.qmd"), "wt") as f: + f.write( + "---\n" + f'title: "{name}"\n' + "---\n" + "\n" + "Welcome to this Quarto project.\n" + ) + + +class Prefect(ProjectSpec): + """Prefect workflow orchestration project. + + Detected by ``prefect.yaml`` at the project root. + """ + + spec_doc = "https://docs.prefect.io/v3/deploy/infrastructure-concepts/prefect-yaml" + + def match(self) -> bool: + return "prefect.yaml" in self.proj.basenames + + def parse(self) -> None: + from projspec.artifact.process import Process + from projspec.content.cicd import PipelineStage + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + try: + with self.proj.get_file("prefect.yaml") as f: + cfg = yaml.safe_load(f) + except Exception as exc: + raise ParseFailed(f"Could not read prefect.yaml: {exc}") from exc + + if not isinstance(cfg, dict): + raise ParseFailed("prefect.yaml did not parse to a mapping") + + meta: dict[str, str] = {} + if name := cfg.get("name"): + meta["name"] = str(name) + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + # Deployments become pipeline stages + deployments = cfg.get("deployments", []) + stages = AttrDict() + arts = AttrDict() + cmds = AttrDict() + for dep in deployments: + if not isinstance(dep, dict): + continue + dep_name = dep.get("name", "default") + entrypoint = dep.get("entrypoint", "") + stages[dep_name] = PipelineStage( + proj=self.proj, + name=dep_name, + cmd=["prefect", "deployment", "run", dep_name], + ) + deploy_cmd = ["prefect", "deploy", "--name", dep_name] + cmds[dep_name] = Command(proj=self.proj, cmd=deploy_cmd) + arts[dep_name] = Process(proj=self.proj, cmd=deploy_cmd) + + if stages: + conts["pipeline_stage"] = stages + if cmds: + conts["command"] = cmds + + # Generic run command + arts["run"] = Process(proj=self.proj, cmd=["prefect", "run"]) + + self._contents = conts + self._artifacts = AttrDict(process=arts) + + +class Dagster(ProjectSpec): + """Dagster data orchestration project. + + Detected by ``pyproject.toml`` with ``[tool.dagster]`` section, + or ``dagster.yaml`` / ``workspace.yaml`` at the project root. + """ + + spec_doc = "https://docs.dagster.io/api/python-api/workspace" + + def match(self) -> bool: + if self.proj.pyproject.get("tool", {}).get("dagster"): + return True + return bool( + {"dagster.yaml", "workspace.yaml"}.intersection(self.proj.basenames) + ) + + def parse(self) -> None: + from projspec.artifact.process import Process, Server + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + meta: dict[str, str] = {} + dagster_cfg = self.proj.pyproject.get("tool", {}).get("dagster", {}) + if isinstance(dagster_cfg, dict): + if module := dagster_cfg.get("module_name"): + meta["module"] = str(module) + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + # Core commands + dbt_cmds = { + "dev": ["dagster", "dev"], + "materialize": ["dagster", "asset", "materialize", "--select", "*"], + } + cmds = AttrDict() + arts = AttrDict() + for name, cmd in dbt_cmds.items(): + cmds[name] = Command(proj=self.proj, cmd=cmd) + + arts["dev"] = Server(proj=self.proj, cmd=["dagster", "dev"]) + arts["materialize"] = Process( + proj=self.proj, + cmd=["dagster", "asset", "materialize", "--select", "*"], + ) + + conts["command"] = cmds + self._contents = conts + self._artifacts = arts + + +class Kedro(ProjectSpec): + """Kedro data science pipeline project. + + Detected by ``pyproject.toml`` with ``[tool.kedro]`` section. + """ + + spec_doc = "https://docs.kedro.org/en/stable/kedro_project_setup/settings.html" + + def match(self) -> bool: + return bool(self.proj.pyproject.get("tool", {}).get("kedro")) + + def parse(self) -> None: + from projspec.artifact.process import Process, Server + from projspec.content.cicd import PipelineStage + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + kedro_cfg = self.proj.pyproject.get("tool", {}).get("kedro", {}) + + meta: dict[str, str] = {} + for key in ("package_name", "project_name", "kedro_init_version"): + if val := kedro_cfg.get(key): + meta[key] = str(val) + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + # Look for pipeline definitions in src//pipelines/ + package_name = kedro_cfg.get("package_name", "") + pipeline_names: list[str] = [] + if package_name: + pipelines_dir = f"{self.proj.url}/src/{package_name}/pipelines" + try: + entries = self.proj.fs.ls(pipelines_dir, detail=False) + pipeline_names = [ + os.path.basename(e) + for e in entries + if self.proj.fs.isdir(e) and not os.path.basename(e).startswith("_") + ] + except Exception: + pass + + cmds = AttrDict() + arts = AttrDict() + stages = AttrDict() + + # Default pipeline + cmds["run"] = Command(proj=self.proj, cmd=["kedro", "run"]) + arts["run"] = Process(proj=self.proj, cmd=["kedro", "run"]) + + for pipeline in pipeline_names: + cmd = ["kedro", "run", "--pipeline", pipeline] + cmds[pipeline] = Command(proj=self.proj, cmd=cmd) + arts[pipeline] = Process(proj=self.proj, cmd=cmd) + stages[pipeline] = PipelineStage(proj=self.proj, name=pipeline, cmd=cmd) + + arts["viz"] = Server(proj=self.proj, cmd=["kedro", "viz", "run"]) + + if stages: + conts["pipeline_stage"] = stages + conts["command"] = cmds + self._contents = conts + self._artifacts = arts + + +class Airflow(ProjectSpec): + """Apache Airflow workflow orchestration project. + + Detected by a ``dags/`` directory at the project root containing Python files. + """ + + spec_doc = ( + "https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/dags.html" + ) + + def match(self) -> bool: + dags_dir = f"{self.proj.url}/dags" + try: + if not self.proj.fs.isdir(dags_dir): + return False + entries = self.proj.fs.ls(dags_dir, detail=False) + return any(e.endswith(".py") for e in entries) + except Exception: + return False + + def parse(self) -> None: + from projspec.artifact.process import Process, Server + from projspec.content.cicd import PipelineStage + from projspec.content.executable import Command + + dags_dir = f"{self.proj.url}/dags" + try: + entries = self.proj.fs.ls(dags_dir, detail=False) + except Exception as exc: + raise ParseFailed(f"Could not list dags/: {exc}") from exc + + stages = AttrDict() + for entry in entries: + if not entry.endswith(".py"): + continue + dag_name = os.path.basename(entry).replace(".py", "") + if dag_name.startswith("_"): + continue + # Try to extract dag_id from file content + try: + with self.proj.fs.open(entry, "rt") as f: + content = f.read() + dag_ids = re.findall(r'dag_id\s*=\s*["\']([^"\']+)["\']', content) + for dag_id in dag_ids: + stages[dag_id] = PipelineStage( + proj=self.proj, + name=dag_id, + cmd=["airflow", "dags", "trigger", dag_id], + ) + except Exception: + stages[dag_name] = PipelineStage( + proj=self.proj, + name=dag_name, + cmd=["airflow", "dags", "trigger", dag_name], + ) + + cmds = AttrDict( + standalone=Command(proj=self.proj, cmd=["airflow", "standalone"]), + scheduler=Command(proj=self.proj, cmd=["airflow", "scheduler"]), + webserver=Command(proj=self.proj, cmd=["airflow", "webserver"]), + ) + arts = AttrDict( + standalone=Process(proj=self.proj, cmd=["airflow", "standalone"]), + webserver=Server( + proj=self.proj, cmd=["airflow", "webserver", "--port", "8080"] + ), + ) + + conts = AttrDict(command=cmds) + if stages: + conts["pipeline_stage"] = stages + + self._contents = conts + self._artifacts = arts + + +class Snakemake(ProjectSpec): + """Snakemake workflow management system project. + + Detected by a ``Snakefile`` or ``workflow/Snakefile`` at the project root. + """ + + spec_doc = ( + "https://snakemake.readthedocs.io/en/stable/snakefiles/configuration.html" + ) + + def match(self) -> bool: + if "Snakefile" in self.proj.basenames: + return True + # also detect workflow/Snakefile layout + workflow_snakefile = f"{self.proj.url}/workflow/Snakefile" + try: + return self.proj.fs.isfile(workflow_snakefile) + except Exception: + return False + + def parse(self) -> None: + from projspec.artifact.process import Process + from projspec.content.cicd import PipelineStage + from projspec.content.executable import Command + + # Determine snakefile path + if "Snakefile" in self.proj.basenames: + snakefile_path = "Snakefile" + else: + snakefile_path = "workflow/Snakefile" + + # Parse rules from snakefile + rule_names: list[str] = [] + try: + with self.proj.get_file(snakefile_path) as f: + content = f.read() + rule_names = re.findall(r"^rule\s+(\w+)\s*:", content, re.MULTILINE) + except Exception: + pass # rules are optional — we still expose the run command + + cmds = AttrDict() + arts = AttrDict() + stages = AttrDict() + + # Generic run command + run_cmd = ["snakemake", "--cores", "all"] + cmds["run"] = Command(proj=self.proj, cmd=run_cmd) + arts["run"] = Process(proj=self.proj, cmd=run_cmd) + + for rule in rule_names: + if rule in ("all", "clean"): + continue + cmd = ["snakemake", rule, "--cores", "all"] + cmds[rule] = Command(proj=self.proj, cmd=cmd) + stages[rule] = PipelineStage(proj=self.proj, name=rule, cmd=cmd) + + if stages: + self._contents = AttrDict(command=cmds, pipeline_stage=stages) + else: + self._contents = AttrDict(command=cmds) + self._artifacts = AttrDict(process=arts) + + +class Nox(ProjectSpec): + """Nox Python automation project. + + Detected by ``noxfile.py`` at the project root. + """ + + spec_doc = "https://nox.thea.codes/en/stable/config.html" + + def match(self) -> bool: + return "noxfile.py" in self.proj.basenames + + def parse(self) -> None: + from projspec.artifact.process import Process + from projspec.content.executable import Command + + # Discover session names via regex on noxfile.py + session_names: list[str] = [] + try: + with self.proj.get_file("noxfile.py") as f: + content = f.read() + # Sessions are decorated functions: @nox.session or @session + session_names = re.findall( + r"@(?:nox\.)?session(?:\([^)]*\))?\s+def\s+(\w+)", + content, + re.MULTILINE, + ) + except Exception: + pass + + cmds = AttrDict() + arts = AttrDict() + + if not session_names: + cmds["nox"] = Command(proj=self.proj, cmd=["nox"]) + arts["nox"] = Process(proj=self.proj, cmd=["nox"]) + else: + for name in session_names: + cmd = ["nox", "-s", name] + cmds[name] = Command(proj=self.proj, cmd=cmd) + arts[name] = Process(proj=self.proj, cmd=cmd) + + self._contents = AttrDict(command=cmds) + self._artifacts = AttrDict(process=arts) + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal noxfile.py.""" + with open(os.path.join(path, "noxfile.py"), "wt") as f: + f.write( + "import nox\n" + "\n" + "\n" + "@nox.session\n" + "def tests(session):\n" + ' """Run the test suite."""\n' + " session.install('pytest')\n" + " session.run('pytest')\n" + "\n" + "\n" + "@nox.session\n" + "def lint(session):\n" + ' """Lint the code."""\n' + " session.install('ruff')\n" + " session.run('ruff', 'check', '.')\n" + ) diff --git a/src/projspec/proj/documentation.py b/src/projspec/proj/documentation.py index e6fdeb1..c9fc7fb 100644 --- a/src/projspec/proj/documentation.py +++ b/src/projspec/proj/documentation.py @@ -2,6 +2,7 @@ import re import toml +import yaml from projspec.proj import ProjectSpec from projspec.proj.base import ParseFailed @@ -259,3 +260,244 @@ def _create(path: str) -> None: # docs/requirements.txt — build dependencies with open(f"{path}/docs/requirements.txt", "wt") as f: f.write("sphinx\n") + + +class MkDocs(ProjectSpec): + """MkDocs documentation project. + + Detected by ``mkdocs.yml`` or ``mkdocs.yaml`` at the project root, when + not already covered by the ReadTheDocs spec. + """ + + spec_doc = "https://www.mkdocs.org/user-guide/configuration/" + + _NAMES = {"mkdocs.yml", "mkdocs.yaml"} + + def match(self) -> bool: + return bool(self._NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.infra import StaticSite + from projspec.artifact.process import Server + from projspec.content.metadata import DescriptiveMetadata + + fname = next(n for n in self._NAMES if n in self.proj.basenames) + try: + with self.proj.get_file(fname) as f: + cfg = yaml.safe_load(f) + except Exception as exc: + raise ParseFailed(f"Could not read {fname}: {exc}") from exc + + cfg = cfg or {} + meta: dict[str, str] = {} + for key in ("site_name", "site_description", "site_author", "repo_url"): + if val := cfg.get(key): + meta[key] = str(val) + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + site_dir = cfg.get("site_dir", "site") + + arts = AttrDict( + docs=StaticSite( + proj=self.proj, + cmd=["mkdocs", "build"], + fn=f"{self.proj.url}/{site_dir}/index.html", + ), + serve=Server( + proj=self.proj, + cmd=["mkdocs", "serve"], + ), + ) + + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal MkDocs project.""" + name = os.path.basename(path) + with open(os.path.join(path, "mkdocs.yml"), "wt") as f: + f.write( + f"site_name: {name}\n" + "\n" + "nav:\n" + " - Home: index.md\n" + "\n" + "theme:\n" + " name: material\n" + ) + docs_dir = os.path.join(path, "docs") + os.makedirs(docs_dir, exist_ok=True) + with open(os.path.join(docs_dir, "index.md"), "wt") as f: + f.write(f"# {name}\n\nWelcome to the documentation.\n") + + +class Sphinx(ProjectSpec): + """Sphinx documentation project (standalone, without ReadTheDocs config). + + Detected by ``conf.py`` in the project root or in a ``docs/`` subdirectory. + """ + + spec_doc = "https://www.sphinx-doc.org/en/master/usage/configuration.html" + + def match(self) -> bool: + if "conf.py" in self.proj.basenames: + return True + # Check docs/conf.py + docs_conf = f"{self.proj.url}/docs/conf.py" + try: + return self.proj.fs.isfile(docs_conf) + except Exception: + return False + + def parse(self) -> None: + from projspec.artifact.infra import StaticSite + from projspec.artifact.process import Server + from projspec.content.metadata import DescriptiveMetadata + + # Find conf.py + if "conf.py" in self.proj.basenames: + conf_path = self.proj.basenames["conf.py"] + docs_dir = self.proj.url + else: + conf_path = f"{self.proj.url}/docs/conf.py" + docs_dir = f"{self.proj.url}/docs" + + meta: dict[str, str] = {} + try: + with self.proj.fs.open(conf_path, "rt") as f: + content = f.read() + for var in ("project", "author", "release", "version"): + m = re.search( + rf'^{var}\s*=\s*["\']([^"\']+)["\']', content, re.MULTILINE + ) + if m: + meta[var] = m.group(1) + except Exception: + pass + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + build_dir = f"{docs_dir}/_build/html" + arts = AttrDict( + docs=StaticSite( + proj=self.proj, + cmd=["sphinx-build", "-b", "html", docs_dir, build_dir], + fn=f"{build_dir}/index.html", + ), + autobuild=Server( + proj=self.proj, + cmd=["sphinx-autobuild", docs_dir, build_dir], + ), + ) + + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Sphinx docs project.""" + name = os.path.basename(path) + docs_dir = os.path.join(path, "docs") + os.makedirs(docs_dir, exist_ok=True) + + with open(os.path.join(docs_dir, "conf.py"), "wt") as f: + f.write( + f'project = "{name}"\n' "extensions = []\n" 'html_theme = "alabaster"\n' + ) + with open(os.path.join(docs_dir, "index.rst"), "wt") as f: + f.write(f"{name}\n{'=' * len(name)}\n\n.. toctree::\n :maxdepth: 2\n") + with open(os.path.join(docs_dir, "requirements.txt"), "wt") as f: + f.write("sphinx\n") + + +class Docusaurus(ProjectSpec): + """Docusaurus documentation/website project. + + Detected by ``docusaurus.config.js``, ``docusaurus.config.ts``, or + ``docusaurus.config.mjs`` at the project root. + """ + + spec_doc = "https://docusaurus.io/docs/configuration" + + _CONFIG_NAMES = { + "docusaurus.config.js", + "docusaurus.config.ts", + "docusaurus.config.mjs", + } + + def match(self) -> bool: + return bool(self._CONFIG_NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.infra import StaticSite + from projspec.artifact.process import Server + from projspec.content.metadata import DescriptiveMetadata + + fname = next(n for n in self._CONFIG_NAMES if n in self.proj.basenames) + + meta: dict[str, str] = {} + try: + with self.proj.get_file(fname) as f: + content = f.read() + for key in ( + "title", + "tagline", + "url", + "organizationName", + "projectName", + ): + m = re.search(rf'{key}\s*:\s*["\']([^"\']+)["\']', content) + if m: + meta[key] = m.group(1) + except Exception: + pass + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + pkg_mgr = "yarn" if "yarn.lock" in self.proj.basenames else "npm" + arts = AttrDict( + build=StaticSite( + proj=self.proj, + cmd=[pkg_mgr, "run", "build"], + fn=f"{self.proj.url}/build/index.html", + ), + start=Server( + proj=self.proj, + cmd=[pkg_mgr, "run", "start"], + ), + ) + + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Docusaurus project via npx.""" + from projspec.utils import run_subprocess + + name = os.path.basename(path) + run_subprocess( + [ + "npx", + "create-docusaurus@latest", + name, + "classic", + "--skip-install", + ], + cwd=os.path.dirname(path) or ".", + output=False, + ) diff --git a/src/projspec/proj/infra.py b/src/projspec/proj/infra.py new file mode 100644 index 0000000..866fe33 --- /dev/null +++ b/src/projspec/proj/infra.py @@ -0,0 +1,574 @@ +"""Infrastructure/deployment project specs: DockerCompose, Terraform, Ansible, Pulumi, CDK, Earthfile, Nixpacks, Vagrant.""" + +import os +import re + +import yaml + +from projspec.proj.base import ParseFailed, ProjectSpec +from projspec.utils import AttrDict + + +class DockerCompose(ProjectSpec): + """Docker Compose multi-service project. + + Detected by ``docker-compose.yml``, ``docker-compose.yaml``, or ``compose.yaml`` + at the project root. + """ + + spec_doc = "https://docs.docker.com/reference/compose-file/" + + _NAMES = { + "docker-compose.yml", + "docker-compose.yaml", + "compose.yml", + "compose.yaml", + } + + def match(self) -> bool: + return bool(self._NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.infra import ComposeStack + from projspec.content.cicd import ServiceDependency + from projspec.content.metadata import DescriptiveMetadata + + fname = next(n for n in self._NAMES if n in self.proj.basenames) + try: + with self.proj.get_file(fname) as f: + cfg = yaml.safe_load(f) + except Exception as exc: + raise ParseFailed(f"Could not read {fname}: {exc}") from exc + + if not isinstance(cfg, dict): + raise ParseFailed(f"{fname} did not parse to a mapping") + + services = cfg.get("services", {}) + service_deps = AttrDict() + for svc_name, svc_cfg in services.items(): + if not isinstance(svc_cfg, dict): + continue + image = svc_cfg.get("image", "") + # Guess service type from image name + svc_type = image.split(":")[0].split("/")[-1] if image else svc_name + service_deps[svc_name] = ServiceDependency( + proj=self.proj, + name=svc_name, + service_type=svc_type, + version=image.split(":")[-1] if ":" in image else "", + image=image, + ) + + conts = AttrDict() + if service_deps: + conts["service_dependency"] = service_deps + + meta: dict[str, str] = {} + if "name" in cfg: + meta["name"] = str(cfg["name"]) + if services: + meta["services"] = ", ".join(services.keys()) + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + self._contents = conts + self._artifacts = AttrDict(stack=ComposeStack(proj=self.proj, file=fname)) + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal docker-compose.yml.""" + with open(os.path.join(path, "docker-compose.yml"), "wt") as f: + f.write( + "services:\n" + " app:\n" + " image: alpine:latest\n" + " command: echo 'Hello from Docker Compose!'\n" + ) + + +class Terraform(ProjectSpec): + """Terraform infrastructure-as-code project. + + Detected by any ``.tf`` file at the project root. + """ + + spec_doc = "https://developer.hashicorp.com/terraform/language" + + def match(self) -> bool: + return any(n.endswith(".tf") for n in self.proj.basenames) + + def parse(self) -> None: + from projspec.artifact.infra import TerraformPlan + from projspec.artifact.process import Process + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + # Extract resource types from .tf files + resource_types: set[str] = set() + providers: set[str] = set() + for basename, full_path in self.proj.basenames.items(): + if not basename.endswith(".tf"): + continue + try: + with self.proj.fs.open(full_path, "rt") as f: + content = f.read() + resource_types.update( + re.findall(r'^resource\s+"([^"]+)"', content, re.MULTILINE) + ) + providers.update( + re.findall(r'source\s*=\s*"[^/]+/([^"]+)"', content, re.MULTILINE) + ) + except Exception: + pass + + conts = AttrDict() + meta: dict[str, str] = {} + if providers: + meta["providers"] = ", ".join(sorted(providers)) + if resource_types: + meta["resource_types"] = ", ".join(sorted(resource_types)) + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + tf_commands = { + "init": ["terraform", "init"], + "validate": ["terraform", "validate"], + "apply": ["terraform", "apply", "-auto-approve"], + "destroy": ["terraform", "destroy", "-auto-approve"], + "output": ["terraform", "output"], + } + cmds = AttrDict() + arts = AttrDict() + for name, cmd in tf_commands.items(): + cmds[name] = Command(proj=self.proj, cmd=cmd) + arts[name] = Process(proj=self.proj, cmd=cmd) + + arts["plan"] = TerraformPlan(proj=self.proj) + + conts["command"] = cmds + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Terraform project.""" + with open(os.path.join(path, "main.tf"), "wt") as f: + f.write( + "terraform {\n" + ' required_version = ">= 1.0"\n' + "}\n" + "\n" + "# Add your resources here\n" + '# resource "aws_instance" "example" {\n' + '# ami = "ami-0c55b159cbfafe1f0"\n' + '# instance_type = "t2.micro"\n' + "# }\n" + ) + with open(os.path.join(path, "variables.tf"), "wt") as f: + f.write( + "# Define input variables here\n" + '# variable "region" {\n' + '# default = "us-east-1"\n' + "# }\n" + ) + with open(os.path.join(path, "outputs.tf"), "wt") as f: + f.write("# Define outputs here\n") + + +class Ansible(ProjectSpec): + """Ansible automation project. + + Detected by ``playbook.yml`` / ``site.yml`` or ``ansible.cfg`` at the root, + or a ``roles/`` directory combined with a YAML playbook. + """ + + spec_doc = "https://docs.ansible.com/ansible/latest/reference_appendices/playbooks_keywords.html" + + _PLAYBOOK_NAMES = {"playbook.yml", "playbook.yaml", "site.yml", "site.yaml"} + + def match(self) -> bool: + if "ansible.cfg" in self.proj.basenames: + return True + if bool(self._PLAYBOOK_NAMES.intersection(self.proj.basenames)): + return True + # roles/ directory alongside a YAML file + if self.proj.fs.isdir(f"{self.proj.url}/roles"): + return any(n.endswith((".yml", ".yaml")) for n in self.proj.basenames) + return False + + def parse(self) -> None: + from projspec.artifact.process import Process + from projspec.content.executable import Command + + # Find playbook files + playbook_files = [ + n + for n in self.proj.basenames + if n.endswith((".yml", ".yaml")) + and n not in {"requirements.yml", "galaxy.yml"} + ] + + cmds = AttrDict() + arts = AttrDict() + + for pb in playbook_files: + name = pb.replace(".yml", "").replace(".yaml", "") + cmd = ["ansible-playbook", pb] + cmds[name] = Command(proj=self.proj, cmd=cmd) + arts[name] = Process(proj=self.proj, cmd=cmd) + + if not cmds: + cmds["run"] = Command(proj=self.proj, cmd=["ansible-playbook", "site.yml"]) + arts["run"] = Process(proj=self.proj, cmd=["ansible-playbook", "site.yml"]) + + self._contents = AttrDict(command=cmds) + self._artifacts = AttrDict(process=arts) + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Ansible project.""" + with open(os.path.join(path, "playbook.yml"), "wt") as f: + f.write( + "---\n" + "- name: Example playbook\n" + " hosts: localhost\n" + " gather_facts: false\n" + " tasks:\n" + " - name: Print hello\n" + " ansible.builtin.debug:\n" + " msg: 'Hello from Ansible!'\n" + ) + with open(os.path.join(path, "inventory"), "wt") as f: + f.write("localhost ansible_connection=local\n") + + +class Pulumi(ProjectSpec): + """Pulumi infrastructure-as-code project. + + Detected by ``Pulumi.yaml`` or ``Pulumi.yml`` at the project root. + """ + + spec_doc = "https://www.pulumi.com/docs/reference/pulumi-yaml/" + + _NAMES = {"Pulumi.yaml", "Pulumi.yml"} + + def match(self) -> bool: + return bool(self._NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.deployment import Deployment + from projspec.artifact.process import Process + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + fname = next(n for n in self._NAMES if n in self.proj.basenames) + try: + with self.proj.get_file(fname) as f: + cfg = yaml.safe_load(f) + except Exception as exc: + raise ParseFailed(f"Could not read {fname}: {exc}") from exc + + if not isinstance(cfg, dict): + raise ParseFailed(f"{fname} did not parse to a mapping") + + meta: dict[str, str] = {} + for key in ("name", "description", "runtime"): + if val := cfg.get(key): + meta[key] = ( + str(val) if not isinstance(val, dict) else str(val.get("name", val)) + ) + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + stack_name = cfg.get("name", "dev") + cmds = AttrDict( + up=Command(proj=self.proj, cmd=["pulumi", "up", "--yes"]), + destroy=Command(proj=self.proj, cmd=["pulumi", "destroy", "--yes"]), + preview=Command(proj=self.proj, cmd=["pulumi", "preview"]), + ) + arts = AttrDict( + deploy=Deployment( + proj=self.proj, + cmd=["pulumi", "up", "--yes"], + release=stack_name, + clean_cmd=["pulumi", "destroy", "--yes"], + ), + preview=Process(proj=self.proj, cmd=["pulumi", "preview"]), + ) + + conts["command"] = cmds + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Pulumi YAML project.""" + name = os.path.basename(path) + with open(os.path.join(path, "Pulumi.yaml"), "wt") as f: + f.write( + f"name: {name}\n" + "runtime: yaml\n" + "description: A Pulumi YAML project\n" + "\n" + "resources: {}\n" + ) + + +class CDK(ProjectSpec): + """AWS Cloud Development Kit (CDK) project. + + Detected by ``cdk.json`` at the project root. + """ + + spec_doc = "https://docs.aws.amazon.com/cdk/v2/guide/projects.html" + + def match(self) -> bool: + return "cdk.json" in self.proj.basenames + + def parse(self) -> None: + import json + from projspec.artifact.deployment import Deployment + from projspec.artifact.process import Process + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + try: + with self.proj.get_file("cdk.json") as f: + cfg = json.loads(f.read()) + except Exception as exc: + raise ParseFailed(f"Could not read cdk.json: {exc}") from exc + + if not isinstance(cfg, dict): + raise ParseFailed("cdk.json did not parse to a mapping") + + app_cmd = cfg.get("app", "") + conts = AttrDict() + if app_cmd: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta={"app": app_cmd} + ) + + cmds = AttrDict( + synth=Command(proj=self.proj, cmd=["cdk", "synth"]), + deploy=Command(proj=self.proj, cmd=["cdk", "deploy", "--all"]), + destroy=Command(proj=self.proj, cmd=["cdk", "destroy", "--all"]), + diff=Command(proj=self.proj, cmd=["cdk", "diff"]), + ) + arts = AttrDict( + deploy=Deployment( + proj=self.proj, + cmd=["cdk", "deploy", "--all", "--require-approval", "never"], + release="cdk", + clean_cmd=["cdk", "destroy", "--all", "--force"], + ), + diff=Process(proj=self.proj, cmd=["cdk", "diff"]), + synth=Process(proj=self.proj, cmd=["cdk", "synth"]), + ) + + conts["command"] = cmds + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal CDK project.""" + with open(os.path.join(path, "cdk.json"), "wt") as f: + f.write('{\n "app": "npx ts-node --prefer-ts-exts bin/app.ts"\n}\n') + + +class Earthfile(ProjectSpec): + """Earthly build project. + + Detected by ``Earthfile`` at the project root. + """ + + spec_doc = "https://docs.earthly.dev/docs/earthfile" + + def match(self) -> bool: + return "Earthfile" in self.proj.basenames + + def parse(self) -> None: + from projspec.artifact.process import Process + from projspec.content.executable import Command + + # Parse targets from Earthfile + target_names: list[str] = [] + try: + with self.proj.get_file("Earthfile") as f: + content = f.read() + target_names = re.findall( + r"^([a-zA-Z][a-zA-Z0-9_-]*):", content, re.MULTILINE + ) + except Exception: + pass + + cmds = AttrDict() + arts = AttrDict() + + for target in target_names: + if target.upper() == target: + # All-caps are typically Earthly VERSION/ARG/etc directives, skip + continue + cmd = ["earthly", f"+{target}"] + cmds[target] = Command(proj=self.proj, cmd=cmd) + arts[target] = Process(proj=self.proj, cmd=cmd) + + if not cmds: + cmds["build"] = Command(proj=self.proj, cmd=["earthly", "+build"]) + arts["build"] = Process(proj=self.proj, cmd=["earthly", "+build"]) + + self._contents = AttrDict(command=cmds) + self._artifacts = AttrDict(process=arts) + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Earthfile.""" + with open(os.path.join(path, "Earthfile"), "wt") as f: + f.write( + "VERSION 0.8\n" + "\n" + "build:\n" + " FROM alpine:latest\n" + " RUN echo 'Hello from Earthly!'\n" + "\n" + "test:\n" + " FROM +build\n" + " RUN echo 'Tests passed!'\n" + ) + + +class Nixpacks(ProjectSpec): + """Nixpacks build configuration project. + + Detected by ``nixpacks.toml`` at the project root. + """ + + spec_doc = "https://nixpacks.com/docs/configuration/file" + + def match(self) -> bool: + return "nixpacks.toml" in self.proj.basenames + + def parse(self) -> None: + import toml + from projspec.artifact.process import Process + from projspec.content.metadata import DescriptiveMetadata + from projspec.utils import PickleableTomlDecoder + + try: + with self.proj.get_file("nixpacks.toml", text=False) as f: + cfg = toml.loads(f.read().decode(), decoder=PickleableTomlDecoder()) + except Exception as exc: + raise ParseFailed(f"Could not read nixpacks.toml: {exc}") from exc + + meta: dict[str, str] = {} + phases = cfg.get("phases", {}) + if phases: + meta["phases"] = ", ".join(phases.keys()) + start = cfg.get("start", {}) + if start_cmd := start.get("cmd"): + meta["start_cmd"] = str(start_cmd) + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + name = os.path.basename(self.proj.url).lower() + arts = AttrDict( + build=Process( + proj=self.proj, cmd=["nixpacks", "build", ".", "--name", name] + ), + ) + + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal nixpacks.toml.""" + with open(os.path.join(path, "nixpacks.toml"), "wt") as f: + f.write( + "[phases.setup]\n" + "nixPkgs = ['python311']\n" + "\n" + "[phases.install]\n" + "cmds = ['pip install -r requirements.txt']\n" + "\n" + "[start]\n" + "cmd = 'python app.py'\n" + ) + + +class Vagrant(ProjectSpec): + """Vagrant virtual machine project. + + Detected by ``Vagrantfile`` at the project root. + """ + + spec_doc = "https://developer.hashicorp.com/vagrant/docs/vagrantfile" + + def match(self) -> bool: + return "Vagrantfile" in self.proj.basenames + + def parse(self) -> None: + from projspec.artifact.process import Process, Server + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + # Extract box name from Vagrantfile via simple regex + meta: dict[str, str] = {} + try: + with self.proj.get_file("Vagrantfile") as f: + content = f.read() + boxes = re.findall(r'config\.vm\.box\s*=\s*["\']([^"\']+)["\']', content) + if boxes: + meta["box"] = boxes[0] + hostname_match = re.search( + r'config\.vm\.hostname\s*=\s*["\']([^"\']+)["\']', content + ) + if hostname_match: + meta["hostname"] = hostname_match.group(1) + except Exception: + pass + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + cmds = AttrDict( + up=Command(proj=self.proj, cmd=["vagrant", "up"]), + halt=Command(proj=self.proj, cmd=["vagrant", "halt"]), + destroy=Command(proj=self.proj, cmd=["vagrant", "destroy", "-f"]), + ssh=Command(proj=self.proj, cmd=["vagrant", "ssh"]), + ) + arts = AttrDict( + vm=Server(proj=self.proj, cmd=["vagrant", "up"]), + ) + + conts["command"] = cmds + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Vagrantfile.""" + with open(os.path.join(path, "Vagrantfile"), "wt") as f: + f.write( + 'Vagrant.configure("2") do |config|\n' + ' config.vm.box = "ubuntu/jammy64"\n' + ' config.vm.provider "virtualbox" do |vb|\n' + ' vb.memory = "1024"\n' + " end\n" + "end\n" + ) diff --git a/src/projspec/proj/jsframeworks.py b/src/projspec/proj/jsframeworks.py new file mode 100644 index 0000000..f464b7a --- /dev/null +++ b/src/projspec/proj/jsframeworks.py @@ -0,0 +1,411 @@ +"""JavaScript/Node.js framework specs: Next.js, Nuxt.js, SvelteKit, Vite, Deno, Bun, pnpm.""" + +import os + +from projspec.proj.base import ParseFailed, ProjectSpec +from projspec.proj.node import Node +from projspec.utils import AttrDict, run_subprocess + + +# --------------------------------------------------------------------------- +# Framework specs built on top of Node +# --------------------------------------------------------------------------- + + +class NextJS(Node): + """Next.js React framework project. + + Detected by the presence of ``next.config.js``, ``next.config.mjs``, or + ``next.config.ts`` at the project root. + """ + + spec_doc = "https://nextjs.org/docs/app/api-reference/config/next-config-js" + + _CONFIG_NAMES = { + "next.config.js", + "next.config.mjs", + "next.config.ts", + "next.config.cjs", + } + + def match(self) -> bool: + return bool(self._CONFIG_NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.base import FileArtifact + from projspec.artifact.process import Server + + super().parse0() + + pkg_mgr = self._pkg_manager() + # Development server + self._artifacts["dev"] = Server( + proj=self.proj, + cmd=[pkg_mgr, "run", "dev"], + ) + # Production build + self._artifacts["build"] = FileArtifact( + proj=self.proj, + cmd=[pkg_mgr, "run", "build"], + fn=f"{self.proj.url}/.next/BUILD_ID", + ) + # Production start + self._artifacts["start"] = Server( + proj=self.proj, + cmd=[pkg_mgr, "run", "start"], + ) + + def _pkg_manager(self) -> str: + if "yarn.lock" in self.proj.basenames: + return "yarn" + if "pnpm-lock.yaml" in self.proj.basenames: + return "pnpm" + if "bun.lock" in self.proj.basenames or "bun.lockb" in self.proj.basenames: + return "bun" + return "npm" + + @staticmethod + def _create(path: str) -> None: + run_subprocess( + ["npx", "create-next-app@latest", path, "--yes"], + cwd=os.path.dirname(path) or ".", + output=False, + ) + + +class NuxtJS(Node): + """Nuxt.js Vue framework project. + + Detected by ``nuxt.config.js``, ``nuxt.config.ts``, or ``nuxt.config.mjs``. + """ + + spec_doc = "https://nuxt.com/docs/api/nuxt-config" + + _CONFIG_NAMES = {"nuxt.config.js", "nuxt.config.ts", "nuxt.config.mjs"} + + def match(self) -> bool: + return bool(self._CONFIG_NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.base import FileArtifact + from projspec.artifact.process import Server + + super().parse0() + + pkg_mgr = self._pkg_manager() + self._artifacts["dev"] = Server( + proj=self.proj, + cmd=[pkg_mgr, "run", "dev"], + ) + self._artifacts["build"] = FileArtifact( + proj=self.proj, + cmd=[pkg_mgr, "run", "build"], + fn=f"{self.proj.url}/.nuxt/tsconfig.json", + ) + self._artifacts["generate"] = FileArtifact( + proj=self.proj, + cmd=[pkg_mgr, "run", "generate"], + fn=f"{self.proj.url}/.output/public/index.html", + ) + + def _pkg_manager(self) -> str: + if "yarn.lock" in self.proj.basenames: + return "yarn" + if "pnpm-lock.yaml" in self.proj.basenames: + return "pnpm" + return "npm" + + @staticmethod + def _create(path: str) -> None: + run_subprocess( + ["npx", "nuxi@latest", "init", path], + cwd=os.path.dirname(path) or ".", + output=False, + ) + + +class SvelteKit(Node): + """SvelteKit project. + + Detected by ``svelte.config.js`` or ``svelte.config.ts``. + """ + + spec_doc = "https://svelte.dev/docs/kit/configuration" + + _CONFIG_NAMES = {"svelte.config.js", "svelte.config.ts"} + + def match(self) -> bool: + return bool(self._CONFIG_NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.base import FileArtifact + from projspec.artifact.process import Server + + super().parse0() + + pkg_mgr = self._pkg_manager() + self._artifacts["dev"] = Server( + proj=self.proj, + cmd=[pkg_mgr, "run", "dev"], + ) + self._artifacts["build"] = FileArtifact( + proj=self.proj, + cmd=[pkg_mgr, "run", "build"], + fn=f"{self.proj.url}/.svelte-kit/output/client/index.html", + ) + self._artifacts["preview"] = Server( + proj=self.proj, + cmd=[pkg_mgr, "run", "preview"], + ) + + def _pkg_manager(self) -> str: + if "yarn.lock" in self.proj.basenames: + return "yarn" + if "pnpm-lock.yaml" in self.proj.basenames: + return "pnpm" + if "bun.lock" in self.proj.basenames or "bun.lockb" in self.proj.basenames: + return "bun" + return "npm" + + @staticmethod + def _create(path: str) -> None: + run_subprocess( + ["npm", "create", "svelte@latest", path], + cwd=os.path.dirname(path) or ".", + output=False, + ) + + +class Vite(Node): + """Vite-based project (any frontend framework using Vite as the build tool). + + Detected by ``vite.config.js``, ``vite.config.ts``, ``vite.config.mjs``, + or ``vite.config.cjs``. Note: SvelteKit also has a svelte.config, so + SvelteKit takes priority via its more-specific match. + """ + + spec_doc = "https://vitejs.dev/config/" + + _CONFIG_NAMES = { + "vite.config.js", + "vite.config.ts", + "vite.config.mjs", + "vite.config.cjs", + "vite.config.mts", + } + + def match(self) -> bool: + return bool(self._CONFIG_NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.infra import StaticSite + from projspec.artifact.process import Server + + super().parse0() + + pkg_mgr = self._pkg_manager() + self._artifacts["dev"] = Server( + proj=self.proj, + cmd=[pkg_mgr, "run", "dev"], + ) + self._artifacts["build"] = StaticSite( + proj=self.proj, + cmd=[pkg_mgr, "run", "build"], + fn=f"{self.proj.url}/dist/index.html", + ) + self._artifacts["preview"] = Server( + proj=self.proj, + cmd=[pkg_mgr, "run", "preview"], + ) + + def _pkg_manager(self) -> str: + if "yarn.lock" in self.proj.basenames: + return "yarn" + if "pnpm-lock.yaml" in self.proj.basenames: + return "pnpm" + if "bun.lock" in self.proj.basenames or "bun.lockb" in self.proj.basenames: + return "bun" + return "npm" + + @staticmethod + def _create(path: str) -> None: + run_subprocess( + [ + "npm", + "create", + "vite@latest", + path, + "--", + "--template", + "vanilla", + ], + cwd=os.path.dirname(path) or ".", + output=False, + ) + + +# --------------------------------------------------------------------------- +# Alternative Node package managers as standalone specs +# --------------------------------------------------------------------------- + + +class Pnpm(Node): + """Node project managed with pnpm. + + Detected by ``pnpm-lock.yaml`` at the project root. + """ + + spec_doc = "https://pnpm.io/package_json" + + def match(self) -> bool: + return "pnpm-lock.yaml" in self.proj.basenames + + def parse(self) -> None: + from projspec.content.environment import Environment, Stack, Precision + from projspec.artifact.python_env import LockFile + + super().parse0() + + try: + with self.proj.fs.open(f"{self.proj.url}/pnpm-lock.yaml", "rt") as f: + import yaml as _yaml + + lock = _yaml.safe_load(f) + except Exception: + lock = {} + + self._artifacts["lock_file"] = LockFile( + proj=self.proj, + cmd=["pnpm", "install"], + fn=self.proj.basenames["pnpm-lock.yaml"], + ) + + if isinstance(lock, dict): + pkgs = list(lock.get("packages", {}).keys()) + if pkgs: + self._contents.setdefault("environment", AttrDict())[ + "pnpm_lock" + ] = Environment( + proj=self.proj, + stack=Stack.NPM, + packages=pkgs, + precision=Precision.LOCK, + ) + + @staticmethod + def _create(path: str) -> None: + run_subprocess(["pnpm", "init"], cwd=path, output=False) + + +class Bun(Node): + """Node project managed with Bun. + + Detected by ``bun.lock`` or the legacy ``bun.lockb`` at the project root. + """ + + spec_doc = "https://bun.sh/docs/install/lockfile" + + def match(self) -> bool: + return bool({"bun.lock", "bun.lockb"}.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.python_env import LockFile + + super().parse0() + + lock_name = "bun.lock" if "bun.lock" in self.proj.basenames else "bun.lockb" + self._artifacts["lock_file"] = LockFile( + proj=self.proj, + cmd=["bun", "install"], + fn=self.proj.basenames[lock_name], + ) + + @staticmethod + def _create(path: str) -> None: + run_subprocess(["bun", "init", "-y"], cwd=path, output=False) + + +# --------------------------------------------------------------------------- +# Deno — a separate runtime, not Node +# --------------------------------------------------------------------------- + + +class Deno(ProjectSpec): + """Deno project. + + Detected by ``deno.json`` or ``deno.jsonc`` at the project root. + """ + + spec_doc = "https://docs.deno.com/runtime/fundamentals/configuration/" + + _CONFIG_NAMES = {"deno.json", "deno.jsonc"} + + def match(self) -> bool: + return bool(self._CONFIG_NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + import json + from projspec.artifact.base import FileArtifact + from projspec.artifact.process import Process, Server + from projspec.artifact.python_env import LockFile + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + fname = next(n for n in self._CONFIG_NAMES if n in self.proj.basenames) + try: + with self.proj.get_file(fname) as f: + cfg = json.loads(f.read()) + except Exception as exc: + raise ParseFailed(f"Could not read {fname}: {exc}") from exc + + if not isinstance(cfg, dict): + raise ParseFailed(f"{fname} did not parse to a mapping") + + meta: dict[str, str] = {} + for key in ("name", "version", "description"): + if val := cfg.get(key): + meta[key] = str(val) + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + arts = AttrDict() + tasks = cfg.get("tasks", {}) + cmds = AttrDict() + for task_name, task_cmd in tasks.items(): + cmd_list = ["deno", "task", task_name] + cmds[task_name] = Command(proj=self.proj, cmd=cmd_list) + arts[task_name] = Process(proj=self.proj, cmd=cmd_list) + + if cmds: + conts["command"] = cmds + + # Lock file + if "deno.lock" in self.proj.basenames: + arts["lock_file"] = LockFile( + proj=self.proj, + cmd=["deno", "cache", "--reload", "mod.ts"], + fn=self.proj.basenames["deno.lock"], + ) + + # Main entry point + main = cfg.get("main") or cfg.get("exports") + if main and isinstance(main, str): + arts["run"] = Process( + proj=self.proj, + cmd=["deno", "run", "--allow-all", main], + ) + + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + run_subprocess( + ["deno", "init", path], + cwd=os.path.dirname(path) or ".", + output=False, + ) From 6d150305a982a9660d11f9a0deabb096ea515016 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 17 Apr 2026 10:02:51 -0400 Subject: [PATCH 4/9] metaflow --- docs/source/api.rst | 12 ++- src/projspec/proj/__init__.py | 2 + src/projspec/proj/dataworkflows.py | 143 +++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+), 5 deletions(-) diff --git a/docs/source/api.rst b/docs/source/api.rst index d6e4405..26b156b 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -49,11 +49,12 @@ User Classes proj.dataworkflows.Dbt proj.dataworkflows.Quarto proj.dataworkflows.Prefect - proj.dataworkflows.Dagster - proj.dataworkflows.Kedro - proj.dataworkflows.Airflow - proj.dataworkflows.Snakemake - proj.dataworkflows.Nox + proj.dataworkflows.Dagster + proj.dataworkflows.Kedro + proj.dataworkflows.Metaflow + proj.dataworkflows.Airflow + proj.dataworkflows.Snakemake + proj.dataworkflows.Nox proj.documentation.Docusaurus proj.documentation.MDBook proj.documentation.MkDocs @@ -128,6 +129,7 @@ User Classes .. autoclass:: projspec.proj.dataworkflows.Prefect .. autoclass:: projspec.proj.dataworkflows.Dagster .. autoclass:: projspec.proj.dataworkflows.Kedro +.. autoclass:: projspec.proj.dataworkflows.Metaflow .. autoclass:: projspec.proj.dataworkflows.Airflow .. autoclass:: projspec.proj.dataworkflows.Snakemake .. autoclass:: projspec.proj.dataworkflows.Nox diff --git a/src/projspec/proj/__init__.py b/src/projspec/proj/__init__.py index 6c2342a..4885046 100644 --- a/src/projspec/proj/__init__.py +++ b/src/projspec/proj/__init__.py @@ -22,6 +22,7 @@ Dagster, Dbt, Kedro, + Metaflow, Nox, Prefect, Quarto, @@ -86,6 +87,7 @@ "Dagster", "Dbt", "Kedro", + "Metaflow", "Nox", "Prefect", "Quarto", diff --git a/src/projspec/proj/dataworkflows.py b/src/projspec/proj/dataworkflows.py index 425d716..cbedfc7 100644 --- a/src/projspec/proj/dataworkflows.py +++ b/src/projspec/proj/dataworkflows.py @@ -580,3 +580,146 @@ def _create(path: str) -> None: " session.install('ruff')\n" " session.run('ruff', 'check', '.')\n" ) + + +class Metaflow(ProjectSpec): + """Metaflow ML/data science workflow project. + + Metaflow has no project-level config file; detection is done by scanning + Python files for ``from metaflow import`` (or ``import metaflow``) combined + with a ``FlowSpec`` subclass definition. + + Each ``.py`` file containing a flow becomes a separate ``Command`` / + ``Process`` pair keyed by the file stem. If a ``@project(name=...)`` + decorator is found, the project name is captured in metadata. If + ``@schedule`` or ``@trigger`` decorators are present, deployment commands + for Argo Workflows and AWS Step Functions are added alongside the local + ``run`` command. + """ + + spec_doc = "https://docs.metaflow.org" + + _IMPORT_RE = re.compile(r"from\s+metaflow\s+import|import\s+metaflow") + _FLOW_RE = re.compile(r"class\s+(\w+)\s*\(\s*\w*FlowSpec\s*\)") + _PROJECT_RE = re.compile(r'@project\s*\(\s*name\s*=\s*["\']([^"\']+)["\']') + _STEP_RE = re.compile(r"@step\s+def\s+(\w+)\s*\(") + _DEPLOY_RE = re.compile(r"@schedule|@trigger|@trigger_on_finish|@project") + + def match(self) -> bool: + for path, content in self.proj.scanned_files.items(): + if not path.endswith(".py"): + continue + try: + src = content.decode() + except Exception: + continue + if self._IMPORT_RE.search(src) and self._FLOW_RE.search(src): + return True + return False + + def parse(self) -> None: + from projspec.artifact.process import Process + from projspec.content.cicd import PipelineStage + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + cmds = AttrDict() + arts = AttrDict() + stages = AttrDict() + project_names: list[str] = [] + + for full_path, content in self.proj.scanned_files.items(): + if not full_path.endswith(".py"): + continue + try: + src = content.decode() + except Exception: + continue + + if not (self._IMPORT_RE.search(src) and self._FLOW_RE.search(src)): + continue + + # Relative path for use in commands + rel = full_path.replace(self.proj.url, "").lstrip("/") + stem = os.path.basename(rel).replace(".py", "") + + # Flow class name and @project name + flow_match = self._FLOW_RE.search(src) + flow_class = flow_match.group(1) if flow_match else stem + + proj_match = self._PROJECT_RE.search(src) + if proj_match: + project_names.append(proj_match.group(1)) + + # Step names → pipeline stages + step_names = self._STEP_RE.findall(src) + for step in step_names: + stage_key = f"{stem}.{step}" + stages[stage_key] = PipelineStage( + proj=self.proj, + name=step, + cmd=["python", rel, "run", f"--start", step], + ) + + # Local run command + run_cmd = ["python", rel, "run"] + cmds[stem] = Command(proj=self.proj, cmd=run_cmd) + arts[stem] = Process(proj=self.proj, cmd=run_cmd) + + # Deployment commands — only when scheduling/trigger decorators present + if self._DEPLOY_RE.search(src): + arts[f"{stem}.argo_create"] = Process( + proj=self.proj, + cmd=["python", rel, "argo-workflows", "create"], + ) + arts[f"{stem}.step_functions_create"] = Process( + proj=self.proj, + cmd=["python", rel, "step-functions", "create"], + ) + + if not cmds: + raise ParseFailed("No Metaflow flows found in scanned files") + + conts = AttrDict() + meta: dict[str, str] = {} + if project_names: + meta["project"] = ", ".join(sorted(set(project_names))) + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + if stages: + conts["pipeline_stage"] = stages + conts["command"] = cmds + + self._contents = conts + self._artifacts = AttrDict(process=arts) + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Metaflow project with a single HelloFlow.""" + name = os.path.basename(path).replace("-", "_").replace(" ", "_") + flow_name = "".join(part.title() for part in name.split("_")) + "Flow" + with open(os.path.join(path, "flow.py"), "wt") as f: + f.write( + "from metaflow import FlowSpec, step\n" + "\n" + "\n" + f"class {flow_name}(FlowSpec):\n" + f' """{flow_name} — generated by projspec."""\n' + "\n" + " @step\n" + " def start(self):\n" + ' """Entry point."""\n' + " print('Starting flow')\n" + " self.next(self.end)\n" + "\n" + " @step\n" + " def end(self):\n" + ' """Final step."""\n' + " print('Flow complete')\n" + "\n" + "\n" + "if __name__ == '__main__':\n" + f" {flow_name}()\n" + ) From 04350f74931285cfadfd7fcfeebd576a88e1d503 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 17 Apr 2026 13:14:10 -0400 Subject: [PATCH 5/9] Some cleanup and add Panel to API doc --- docs/source/api.rst | 7 +- src/projspec/content/cicd.py | 8 +- src/projspec/proj/__init__.py | 5 +- src/projspec/proj/cicd.py | 25 ++--- src/projspec/proj/datapackage.py | 2 +- src/projspec/proj/dataworkflows.py | 143 ++++++++++++++++++++++------- src/projspec/proj/documentation.py | 17 +--- src/projspec/proj/infra.py | 40 ++------ src/projspec/proj/jsframeworks.py | 46 ++-------- src/projspec/proj/workflows.py | 91 ------------------ 10 files changed, 145 insertions(+), 239 deletions(-) delete mode 100644 src/projspec/proj/workflows.py diff --git a/docs/source/api.rst b/docs/source/api.rst index 26b156b..0555d43 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -52,6 +52,7 @@ User Classes proj.dataworkflows.Dagster proj.dataworkflows.Kedro proj.dataworkflows.Metaflow + proj.dataworkflows.MLFlow proj.dataworkflows.Airflow proj.dataworkflows.Snakemake proj.dataworkflows.Nox @@ -101,9 +102,10 @@ User Classes proj.webapp.Django proj.webapp.Gradio proj.webapp.Marimo + proj.webapp.Panel proj.webapp.Shiny proj.webapp.Streamlit - proj.workflows.MLFlow + proj.dataworkflows.MLFlow .. autoclass:: projspec.artifact.container.Docker @@ -179,9 +181,10 @@ User Classes .. autoclass:: projspec.proj.webapp.Django .. autoclass:: projspec.proj.webapp.Gradio .. autoclass:: projspec.proj.webapp.Marimo +.. autoclass:: projspec.proj.webapp.Panel .. autoclass:: projspec.proj.webapp.Shiny .. autoclass:: projspec.proj.webapp.Streamlit -.. autoclass:: projspec.proj.workflows.MLFlow +.. autoclass:: projspec.proj.dataworkflows.MLFlow Contents diff --git a/src/projspec/content/cicd.py b/src/projspec/content/cicd.py index 906fe4c..596e808 100644 --- a/src/projspec/content/cicd.py +++ b/src/projspec/content/cicd.py @@ -25,10 +25,7 @@ class CIWorkflow(BaseContent): @dataclass class PipelineStage(BaseContent): - """A named stage or step in a data/ML/workflow pipeline. - - Used by dbt, Snakemake, Prefect, Airflow, Kedro, DVC, etc. - """ + """A named stage or step in a data/ML/workflow pipeline.""" name: str = "" cmd: list = field(default_factory=list) @@ -39,8 +36,7 @@ class PipelineStage(BaseContent): class ServiceDependency(BaseContent): """An external service that a project depends on at runtime. - Extracted from Docker Compose service definitions, Helm values, etc. - Examples: postgres, redis, kafka, elasticsearch. + Typically exposed via an open TCP port, e.g., as used in container orchestration. """ name: str = "" diff --git a/src/projspec/proj/__init__.py b/src/projspec/proj/__init__.py index 4885046..94560da 100644 --- a/src/projspec/proj/__init__.py +++ b/src/projspec/proj/__init__.py @@ -23,6 +23,7 @@ Dbt, Kedro, Metaflow, + MLFlow, Nox, Prefect, Quarto, @@ -62,7 +63,6 @@ from projspec.proj.rust import Rust, RustPython from projspec.proj.uv import Uv from projspec.proj.webapp import Django, Gradio, Marimo, Shiny, Streamlit -from projspec.proj.workflows import MLFlow __all__ = [ "ParseFailed", @@ -88,6 +88,7 @@ "Dbt", "Kedro", "Metaflow", + "MLFlow", "Nox", "Prefect", "Quarto", @@ -152,6 +153,4 @@ "Marimo", "Shiny", "Streamlit", - # Workflows - "MLFlow", ] diff --git a/src/projspec/proj/cicd.py b/src/projspec/proj/cicd.py index 803f3df..8ddbb49 100644 --- a/src/projspec/proj/cicd.py +++ b/src/projspec/proj/cicd.py @@ -9,11 +9,9 @@ class GitHubActions(ProjectExtra): - """GitHub Actions CI/CD workflows defined in .github/workflows/. + """GitHub Actions CI/CD workflows - Each YAML file under .github/workflows/ defines one workflow. This spec - is a ``ProjectExtra`` so that its ``CIWorkflow`` content objects are merged - into the root project rather than appearing as a standalone project type. + Each YAML file under .github/workflows/ defines one workflow. """ spec_doc = "https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions" @@ -103,11 +101,7 @@ def _create(path: str) -> None: class GitLabCI(ProjectExtra): - """GitLab CI/CD pipeline defined in .gitlab-ci.yml. - - This spec is a ``ProjectExtra`` so its ``CIWorkflow`` content is merged - into the root project. - """ + """GitLab CI/CD pipeline defined in .gitlab-ci.yml.""" spec_doc = "https://docs.gitlab.com/ci/yaml/" @@ -169,11 +163,7 @@ def _create(path: str) -> None: class CircleCI(ProjectExtra): - """CircleCI pipeline defined in .circleci/config.yml. - - This spec is a ``ProjectExtra`` so its ``CIWorkflow`` content is merged - into the root project. - """ + """CircleCI pipeline defined in .circleci/config.yml.""" spec_doc = "https://circleci.com/docs/configuration-reference/" @@ -240,7 +230,7 @@ class Taskfile(ProjectSpec): """Task runner using Taskfile (go-task). Taskfile.yml (or Taskfile.yaml / taskfile.yml) defines named tasks that - can be run with ``task ``. + can be run with `task `. """ spec_doc = "https://taskfile.dev/reference/schema/" @@ -300,7 +290,7 @@ def _create(path: str) -> None: class JustFile(ProjectSpec): """Task runner using Just (justfile / Justfile). - A justfile defines named recipes that can be run with ``just ``. + A justfile defines named recipes that can be run with `just `. """ spec_doc = "https://just.systems/man/en/" @@ -355,8 +345,7 @@ def _create(path: str) -> None: class Tox(ProjectSpec): """Python test automation using tox. - Reads ``tox.ini`` or ``tox.toml`` (or ``[tool.tox]`` in ``pyproject.toml``) - and exposes each test environment as a ``Command`` and ``Process``. + A set of environments and run commands to be run as a workflow. """ spec_doc = "https://tox.wiki/en/stable/config.html" diff --git a/src/projspec/proj/datapackage.py b/src/projspec/proj/datapackage.py index ffba4ac..2191a16 100644 --- a/src/projspec/proj/datapackage.py +++ b/src/projspec/proj/datapackage.py @@ -4,7 +4,7 @@ class DataPackage(ProjectSpec): - # by frictionless data + """A FrictionlessData datapackage spec""" spec_doc = "https://datapackage.org/standard/data-package/#structure" # e.g., as exported by zenodo diff --git a/src/projspec/proj/dataworkflows.py b/src/projspec/proj/dataworkflows.py index cbedfc7..58d9443 100644 --- a/src/projspec/proj/dataworkflows.py +++ b/src/projspec/proj/dataworkflows.py @@ -1,4 +1,4 @@ -"""Data/ML workflow specs: dbt, Quarto, Prefect, Dagster, Kedro, Airflow, Snakemake, Nox.""" +"""Data/ML workflow specs: dbt, Quarto, Prefect, Dagster, Kedro, Airflow, Snakemake, Nox, Metaflow, MLFlow.""" import os import re @@ -12,7 +12,10 @@ class Dbt(ProjectSpec): """dbt (data build tool) project. - Detected by ``dbt_project.yml`` at the project root. + dbt is used for data ingestion, validation, and transform. + + The spec dbt about the context of your project and how to transform your data + (build your data sets). """ spec_doc = "https://docs.getdbt.com/reference/dbt_project.yml" @@ -95,10 +98,7 @@ def _create(path: str) -> None: class Quarto(ProjectSpec): - """Quarto publishing system project. - - Detected by ``_quarto.yml`` / ``_quarto.yaml`` or any ``.qmd`` file at the root. - """ + """Quarto publishing system project.""" spec_doc = "https://quarto.org/docs/reference/projects/core.html" @@ -188,10 +188,7 @@ def _create(path: str) -> None: class Prefect(ProjectSpec): - """Prefect workflow orchestration project. - - Detected by ``prefect.yaml`` at the project root. - """ + """Prefect workflow orchestration project.""" spec_doc = "https://docs.prefect.io/v3/deploy/infrastructure-concepts/prefect-yaml" @@ -255,11 +252,7 @@ def parse(self) -> None: class Dagster(ProjectSpec): - """Dagster data orchestration project. - - Detected by ``pyproject.toml`` with ``[tool.dagster]`` section, - or ``dagster.yaml`` / ``workspace.yaml`` at the project root. - """ + """Dagster data orchestration project.""" spec_doc = "https://docs.dagster.io/api/python-api/workspace" @@ -309,10 +302,7 @@ def parse(self) -> None: class Kedro(ProjectSpec): - """Kedro data science pipeline project. - - Detected by ``pyproject.toml`` with ``[tool.kedro]`` section. - """ + """Kedro data science pipeline project.""" spec_doc = "https://docs.kedro.org/en/stable/kedro_project_setup/settings.html" @@ -377,10 +367,7 @@ def parse(self) -> None: class Airflow(ProjectSpec): - """Apache Airflow workflow orchestration project. - - Detected by a ``dags/`` directory at the project root containing Python files. - """ + """Apache Airflow workflow orchestration project/DAG spec.""" spec_doc = ( "https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/dags.html" @@ -453,10 +440,7 @@ def parse(self) -> None: class Snakemake(ProjectSpec): - """Snakemake workflow management system project. - - Detected by a ``Snakefile`` or ``workflow/Snakefile`` at the project root. - """ + """Snakemake workflow management system project.""" spec_doc = ( "https://snakemake.readthedocs.io/en/stable/snakefiles/configuration.html" @@ -518,7 +502,9 @@ def parse(self) -> None: class Nox(ProjectSpec): """Nox Python automation project. - Detected by ``noxfile.py`` at the project root. + Often used for testing, linting, and packaging. Nox is a Python + environment management tool that allows you to define multiple + CI runs in one execution. """ spec_doc = "https://nox.thea.codes/en/stable/config.html" @@ -586,15 +572,18 @@ class Metaflow(ProjectSpec): """Metaflow ML/data science workflow project. Metaflow has no project-level config file; detection is done by scanning - Python files for ``from metaflow import`` (or ``import metaflow``) combined - with a ``FlowSpec`` subclass definition. + Python files for `from metaflow import` (or `import metaflow`) combined + with a `FlowSpec` subclass definition. - Each ``.py`` file containing a flow becomes a separate ``Command`` / - ``Process`` pair keyed by the file stem. If a ``@project(name=...)`` + Each `.py` file containing a flow becomes a separate `Command` / + `Process` pair keyed by the file stem. If a `@project(name=...)` decorator is found, the project name is captured in metadata. If - ``@schedule`` or ``@trigger`` decorators are present, deployment commands + `@schedule` or `@trigger` decorators are present, deployment commands for Argo Workflows and AWS Step Functions are added alongside the local - ``run`` command. + `run` command. + + No explicit parsing of Config files, since they are designed as + defaults, and often overridden (and hard to detect). """ spec_doc = "https://docs.metaflow.org" @@ -723,3 +712,89 @@ def _create(path: str) -> None: "if __name__ == '__main__':\n" f" {flow_name}()\n" ) + + +class MLFlow(ProjectSpec): + """MLflow project, defined by an `MLproject` (or `MLFlow`) file. + + An MLproject file is a YAML document that declares the project name, + the environment (conda or pip), and one or more named entry points. + """ + + spec_doc = ( + "https://mlflow.org/docs/latest/ml/projects/#mlproject-file-configuration" + ) + + def match(self) -> bool: + return "MLproject" in self.proj.basenames or "MLFlow" in self.proj.basenames + + def parse(self) -> None: + from projspec.artifact.process import Process + from projspec.content.environment import Environment, Precision, Stack + from projspec.content.executable import Command + + fname = "MLproject" if "MLproject" in self.proj.basenames else "MLFlow" + with self.proj.fs.open(self.proj.basenames[fname], "rt") as f: + meta = yaml.safe_load(f) + + if "python_env" in meta: + with self.proj.get_file(meta["python_env"], text=True) as f: + env = yaml.safe_load(f) + self._contents["environment"] = Environment( + stack=Stack.PIP, + precision=Precision.SPEC, + packages=env.get("dependencies", []) + + [f"python {env.get('python', '')}"], + proj=self.proj, + ) + elif "conda_env" in meta: + with self.proj.get_file(meta["conda_env"], text=True) as f: + env = yaml.safe_load(f) + self._contents["environment"] = Environment( + stack=Stack.CONDA, + precision=Precision.SPEC, + packages=env.get("dependencies", []), + channels=env.get("channels"), + proj=self.proj, + ) + + cmds = AttrDict() + arts = AttrDict() + for name, ep in meta.get("entry_points", {}).items(): + cmds[name] = Command(proj=self.proj, cmd=ep["command"]) + arts[name] = Process(proj=self.proj, cmd=["mlflow", "run", ".", "-e", name]) + + if cmds: + self._contents["command"] = cmds + if arts: + self._artifacts = AttrDict(process=arts) + if self._contents is None: + self._contents = AttrDict() + if self._artifacts is None: + self._artifacts = AttrDict() + + @staticmethod + def _create(path: str) -> None: + with open(f"{path}/MLproject", "w") as f: + f.write( + "name: tutorial\n" + "\n" + "conda_env: conda.yaml\n" + "\n" + "entry_points:\n" + " main:\n" + " parameters:\n" + " alpha: {type: float, default: 0.5}\n" + " l1_ratio: {type: float, default: 0.1}\n" + ' command: "python train.py {alpha} {l1_ratio}"\n' + ) + with open(f"{path}/conda.yaml", "w") as f: + f.write( + "name: ml-project\n" + "channels:\n" + " - conda-forge\n" + "dependencies:\n" + " - python=3.9\n" + ) + with open(f"{path}/train.py", "w") as f: + f.write("# MLFlow training script\n") diff --git a/src/projspec/proj/documentation.py b/src/projspec/proj/documentation.py index c9fc7fb..77d018b 100644 --- a/src/projspec/proj/documentation.py +++ b/src/projspec/proj/documentation.py @@ -263,11 +263,7 @@ def _create(path: str) -> None: class MkDocs(ProjectSpec): - """MkDocs documentation project. - - Detected by ``mkdocs.yml`` or ``mkdocs.yaml`` at the project root, when - not already covered by the ReadTheDocs spec. - """ + """MkDocs documentation project.""" spec_doc = "https://www.mkdocs.org/user-guide/configuration/" @@ -338,10 +334,7 @@ def _create(path: str) -> None: class Sphinx(ProjectSpec): - """Sphinx documentation project (standalone, without ReadTheDocs config). - - Detected by ``conf.py`` in the project root or in a ``docs/`` subdirectory. - """ + """Sphinx documentation project (standalone, without ReadTheDocs config).""" spec_doc = "https://www.sphinx-doc.org/en/master/usage/configuration.html" @@ -421,11 +414,7 @@ def _create(path: str) -> None: class Docusaurus(ProjectSpec): - """Docusaurus documentation/website project. - - Detected by ``docusaurus.config.js``, ``docusaurus.config.ts``, or - ``docusaurus.config.mjs`` at the project root. - """ + """Docusaurus documentation/website project.""" spec_doc = "https://docusaurus.io/docs/configuration" diff --git a/src/projspec/proj/infra.py b/src/projspec/proj/infra.py index 866fe33..caab443 100644 --- a/src/projspec/proj/infra.py +++ b/src/projspec/proj/infra.py @@ -12,8 +12,8 @@ class DockerCompose(ProjectSpec): """Docker Compose multi-service project. - Detected by ``docker-compose.yml``, ``docker-compose.yaml``, or ``compose.yaml`` - at the project root. + Designed to launch a set of runtimes (specific images with config), volumes + and networks, and expose ports. """ spec_doc = "https://docs.docker.com/reference/compose-file/" @@ -89,10 +89,7 @@ def _create(path: str) -> None: class Terraform(ProjectSpec): - """Terraform infrastructure-as-code project. - - Detected by any ``.tf`` file at the project root. - """ + """Terraform infrastructure-as-code project.""" spec_doc = "https://developer.hashicorp.com/terraform/language" @@ -180,11 +177,7 @@ def _create(path: str) -> None: class Ansible(ProjectSpec): - """Ansible automation project. - - Detected by ``playbook.yml`` / ``site.yml`` or ``ansible.cfg`` at the root, - or a ``roles/`` directory combined with a YAML playbook. - """ + """Ansible automation project.""" spec_doc = "https://docs.ansible.com/ansible/latest/reference_appendices/playbooks_keywords.html" @@ -247,10 +240,7 @@ def _create(path: str) -> None: class Pulumi(ProjectSpec): - """Pulumi infrastructure-as-code project. - - Detected by ``Pulumi.yaml`` or ``Pulumi.yml`` at the project root. - """ + """Pulumi infrastructure-as-code project.""" spec_doc = "https://www.pulumi.com/docs/reference/pulumi-yaml/" @@ -323,10 +313,7 @@ def _create(path: str) -> None: class CDK(ProjectSpec): - """AWS Cloud Development Kit (CDK) project. - - Detected by ``cdk.json`` at the project root. - """ + """AWS Cloud Development Kit (CDK) project.""" spec_doc = "https://docs.aws.amazon.com/cdk/v2/guide/projects.html" @@ -385,10 +372,7 @@ def _create(path: str) -> None: class Earthfile(ProjectSpec): - """Earthly build project. - - Detected by ``Earthfile`` at the project root. - """ + """Earthly build project.""" spec_doc = "https://docs.earthly.dev/docs/earthfile" @@ -446,10 +430,7 @@ def _create(path: str) -> None: class Nixpacks(ProjectSpec): - """Nixpacks build configuration project. - - Detected by ``nixpacks.toml`` at the project root. - """ + """Nixpacks build configuration project.""" spec_doc = "https://nixpacks.com/docs/configuration/file" @@ -509,10 +490,7 @@ def _create(path: str) -> None: class Vagrant(ProjectSpec): - """Vagrant virtual machine project. - - Detected by ``Vagrantfile`` at the project root. - """ + """Vagrant virtual machine project.""" spec_doc = "https://developer.hashicorp.com/vagrant/docs/vagrantfile" diff --git a/src/projspec/proj/jsframeworks.py b/src/projspec/proj/jsframeworks.py index f464b7a..d7b6744 100644 --- a/src/projspec/proj/jsframeworks.py +++ b/src/projspec/proj/jsframeworks.py @@ -7,17 +7,8 @@ from projspec.utils import AttrDict, run_subprocess -# --------------------------------------------------------------------------- -# Framework specs built on top of Node -# --------------------------------------------------------------------------- - - class NextJS(Node): - """Next.js React framework project. - - Detected by the presence of ``next.config.js``, ``next.config.mjs``, or - ``next.config.ts`` at the project root. - """ + """Next.js React framework project.""" spec_doc = "https://nextjs.org/docs/app/api-reference/config/next-config-js" @@ -74,10 +65,7 @@ def _create(path: str) -> None: class NuxtJS(Node): - """Nuxt.js Vue framework project. - - Detected by ``nuxt.config.js``, ``nuxt.config.ts``, or ``nuxt.config.mjs``. - """ + """Nuxt.js Vue framework project.""" spec_doc = "https://nuxt.com/docs/api/nuxt-config" @@ -125,10 +113,7 @@ def _create(path: str) -> None: class SvelteKit(Node): - """SvelteKit project. - - Detected by ``svelte.config.js`` or ``svelte.config.ts``. - """ + """SvelteKit project.""" spec_doc = "https://svelte.dev/docs/kit/configuration" @@ -179,8 +164,7 @@ def _create(path: str) -> None: class Vite(Node): """Vite-based project (any frontend framework using Vite as the build tool). - Detected by ``vite.config.js``, ``vite.config.ts``, ``vite.config.mjs``, - or ``vite.config.cjs``. Note: SvelteKit also has a svelte.config, so + Note: SvelteKit also has a svelte.config, so SvelteKit takes priority via its more-specific match. """ @@ -244,16 +228,8 @@ def _create(path: str) -> None: ) -# --------------------------------------------------------------------------- -# Alternative Node package managers as standalone specs -# --------------------------------------------------------------------------- - - class Pnpm(Node): - """Node project managed with pnpm. - - Detected by ``pnpm-lock.yaml`` at the project root. - """ + """Node project managed with pnpm.""" spec_doc = "https://pnpm.io/package_json" @@ -298,10 +274,7 @@ def _create(path: str) -> None: class Bun(Node): - """Node project managed with Bun. - - Detected by ``bun.lock`` or the legacy ``bun.lockb`` at the project root. - """ + """Node project managed with Bun.""" spec_doc = "https://bun.sh/docs/install/lockfile" @@ -325,15 +298,10 @@ def _create(path: str) -> None: run_subprocess(["bun", "init", "-y"], cwd=path, output=False) -# --------------------------------------------------------------------------- -# Deno — a separate runtime, not Node -# --------------------------------------------------------------------------- - - class Deno(ProjectSpec): """Deno project. - Detected by ``deno.json`` or ``deno.jsonc`` at the project root. + Note: this is a separate runtime, not a Node project. """ spec_doc = "https://docs.deno.com/runtime/fundamentals/configuration/" diff --git a/src/projspec/proj/workflows.py b/src/projspec/proj/workflows.py deleted file mode 100644 index 8660f1d..0000000 --- a/src/projspec/proj/workflows.py +++ /dev/null @@ -1,91 +0,0 @@ -import yaml - -from projspec.proj import ProjectSpec -from projspec.utils import AttrDict - - -class MLFlow(ProjectSpec): - spec_doc = ( - "https://mlflow.org/docs/latest/ml/projects/#mlproject-file-configuration" - ) - - def match(self) -> bool: - return "MLFlow" in self.proj.basenames - - def parse(self) -> None: - from projspec.content.environment import Environment, Precision, Stack - from projspec.artifact.process import Process - from projspec.content.executable import Command - - with self.proj.fs.open(self.proj.basenames["MLFlow"], "rt") as f: - meta = yaml.safe_load(f) - if "python_env" in meta: - with self.get_file(meta["python_env"], text=True) as f: - env = yaml.safe_load(f) - self.contents["environment"] = Environment( - stack=Stack.PIP, - precision=Precision.SPEC, - packages=env.get("dependencies", []) - + [f"python {env.get('python', '')}"], - proj=self.proj, - ) - elif "conda_env" in meta: - with self.get_file(meta["conda_env"], text=True) as f: - env = yaml.safe_load(f) - self.contents["environment"] = Environment( - stack=Stack.CONDA, - precision=Precision.SPEC, - packages=env.get("dependencies", []), - channels=env.get("channels"), - proj=self.proj, - ) - for name, cmd in meta.get("entry_points", {}).items(): - self.contents.setdefault("command", AttrDict())[name] = Command( - proj=self.proj, cmd=cmd["command"] - ) - self.artifacts.setdefault("process", AttrDict())[name] = Process( - proj=self.proj, cmd=["mlflow", "run", ".", "-e", name] - ) - - @staticmethod - def _create(path: str) -> None: - with open(f"{path}/MLFlow", "w") as f: - # https://github.com/mlflow/mlflow-example - f.write( - """ -name: tutorial - -conda_env: conda.yaml - -entry_points: - main: - parameters: - alpha: {type: float, default: 0.5} - l1_ratio: {type: float, default: 0.1} - command: "python train.py {alpha} {l1_ratio}" -""" - ) - with open(f"{path}/conda.yaml", "w") as f: - f.write( - """ -name: ml-project -channels: - - conda-forge -dependencies: - - python=3.9 -""" - ) - with open(f"{path}/train.py", "w") as f: - f.write( - """ -# MLFlow code -""" - ) - - -# TODO: prefect https://docs.prefect.io/v3/how-to-guides/configuration/ -# manage-settings#configure-settings-for-a-project - -# TODO: apache airflow? (is complex!) - -# TODO: dbt https://docs.getdbt.com/reference/dbt_project.yml From 9c3390e4f804f19a9e50a14531283cb725371c6d Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 17 Apr 2026 14:33:54 -0400 Subject: [PATCH 6/9] Add install suggestions --- src/projspec/tools.py | 286 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 286 insertions(+) diff --git a/src/projspec/tools.py b/src/projspec/tools.py index 269d21e..ef06e40 100644 --- a/src/projspec/tools.py +++ b/src/projspec/tools.py @@ -292,6 +292,292 @@ class ToolInfo: "uv add mlflow", ], ), + # ------------------------------------------------------------------ + # CI/CD task runners + # ------------------------------------------------------------------ + ToolInfo( + name="task", + description="Task runner / build tool using Taskfile.yml (go-task).", + install_suggestions=[ + "brew install go-task", + "conda install -c conda-forge go-task", + 'sh -c "$(curl --location https://taskfile.dev/install.sh)" -- -d -b /usr/local/bin', + "winget install --id=Task.Task", + "https://taskfile.dev/installation/", + ], + ), + ToolInfo( + name="just", + description="Command runner for project-specific scripts defined in a justfile.", + install_suggestions=[ + "brew install just", + "conda install -c conda-forge just", + "cargo install just", + "winget install --id=Casey.Just", + "https://github.com/casey/just#installation", + ], + ), + ToolInfo( + name="tox", + description="Generic Python test automation and virtualenv management tool.", + install_suggestions=[ + "pip install tox", + "pipx install tox", + "conda install -c conda-forge tox", + "uv tool install tox", + ], + ), + ToolInfo( + name="nox", + description="Flexible Python test automation, similar to tox but using plain Python.", + install_suggestions=[ + "pip install nox", + "pipx install nox", + "conda install -c conda-forge nox", + "uv tool install nox", + ], + ), + # ------------------------------------------------------------------ + # Data / ML workflow tools + # ------------------------------------------------------------------ + ToolInfo( + name="dbt", + description="Data transformation tool that runs SQL models against a data warehouse.", + install_suggestions=[ + "pip install dbt-core", + "uv add dbt-core", + "conda install -c conda-forge dbt-core", + "https://docs.getdbt.com/docs/core/installation-overview", + ], + ), + ToolInfo( + name="quarto", + description="Open-source scientific and technical publishing system.", + install_suggestions=[ + "https://quarto.org/docs/get-started/", + "brew install --cask quarto", + "conda install -c conda-forge quarto", + "winget install --id=Posit.Quarto", + ], + ), + ToolInfo( + name="prefect", + description="Workflow orchestration platform for data and ML pipelines.", + install_suggestions=[ + "pip install prefect", + "uv add prefect", + "conda install -c conda-forge prefect", + ], + ), + ToolInfo( + name="dagster", + description="Cloud-native data orchestration platform for data pipelines.", + install_suggestions=[ + "pip install dagster dagster-webserver", + "uv add dagster dagster-webserver", + "conda install -c conda-forge dagster", + ], + ), + ToolInfo( + name="kedro", + description="Framework for creating reproducible, maintainable data science pipelines.", + install_suggestions=[ + "pip install kedro", + "uv add kedro", + "conda install -c conda-forge kedro", + "pipx install kedro", + ], + ), + ToolInfo( + name="airflow", + description="Platform for programmatically authoring, scheduling, and monitoring workflows.", + install_suggestions=[ + "pip install apache-airflow", + "uv add apache-airflow", + "conda install -c conda-forge apache-airflow", + "https://airflow.apache.org/docs/apache-airflow/stable/installation/", + ], + ), + ToolInfo( + name="snakemake", + description="Workflow management system for reproducible and scalable data analyses.", + install_suggestions=[ + "pip install snakemake", + "conda install -c conda-forge -c bioconda snakemake", + "uv add snakemake", + "mamba install -c conda-forge -c bioconda snakemake", + ], + ), + # ------------------------------------------------------------------ + # Documentation tools + # ------------------------------------------------------------------ + ToolInfo( + name="mkdocs", + description="Static site generator for project documentation, written in Python.", + install_suggestions=[ + "pip install mkdocs", + "uv add mkdocs", + "conda install -c conda-forge mkdocs", + "pipx install mkdocs", + ], + ), + ToolInfo( + name="sphinx-build", + description="Sphinx documentation builder (invoked as sphinx-build).", + install_suggestions=[ + "pip install sphinx", + "uv add sphinx", + "conda install -c conda-forge sphinx", + "pipx install sphinx", + ], + ), + ToolInfo( + name="sphinx-autobuild", + description="Live-reloading Sphinx documentation server.", + install_suggestions=[ + "pip install sphinx-autobuild", + "uv add sphinx-autobuild", + "conda install -c conda-forge sphinx-autobuild", + ], + ), + ToolInfo( + name="mdbook", + description="Utility to create modern online books from Markdown files (used by the Rust project).", + install_suggestions=[ + "cargo install mdbook", + "brew install mdbook", + "conda install -c conda-forge mdbook", + "https://rust-lang.github.io/mdBook/guide/installation.html", + ], + ), + # ------------------------------------------------------------------ + # Infrastructure / IaC tools + # ------------------------------------------------------------------ + ToolInfo( + name="terraform", + description="Infrastructure as Code tool by HashiCorp for provisioning cloud resources.", + install_suggestions=[ + "brew install terraform", + "conda install -c conda-forge terraform", + "winget install --id=Hashicorp.Terraform", + "https://developer.hashicorp.com/terraform/install", + ], + ), + ToolInfo( + name="ansible-playbook", + description="Ansible playbook runner for automating configuration and deployment.", + install_suggestions=[ + "pip install ansible", + "uv add ansible", + "conda install -c conda-forge ansible", + "brew install ansible", + "pipx install ansible", + ], + ), + ToolInfo( + name="pulumi", + description="Infrastructure as Code platform supporting multiple languages.", + install_suggestions=[ + "curl -fsSL https://get.pulumi.com | sh", + "brew install pulumi/tap/pulumi", + "conda install -c conda-forge pulumi", + "winget install --id=Pulumi.Pulumi", + "https://www.pulumi.com/docs/install/", + ], + ), + ToolInfo( + name="cdk", + description="AWS Cloud Development Kit CLI for defining cloud infrastructure in code.", + install_suggestions=[ + "npm install -g aws-cdk", + "npx aws-cdk@latest", + "https://docs.aws.amazon.com/cdk/v2/guide/getting-started.html", + ], + ), + ToolInfo( + name="earthly", + description="Build automation tool combining Makefile and Dockerfile syntax.", + install_suggestions=[ + "brew install earthly/earthly/earthly", + "sudo /bin/sh -c 'wget https://github.com/earthly/earthly/releases/latest/download/earthly-linux-amd64 -O /usr/local/bin/earthly && chmod +x /usr/local/bin/earthly'", + "winget install --id=Earthly.Earthly", + "https://earthly.dev/get-earthly", + ], + ), + ToolInfo( + name="nixpacks", + description="Build app source code into OCI images using Nix, without a Dockerfile.", + install_suggestions=[ + "curl -sSL https://nixpacks.com/install.sh | bash", + "brew install railwayapp/tap/nixpacks", + "https://nixpacks.com/docs/getting-started", + ], + ), + ToolInfo( + name="vagrant", + description="Tool for building and managing portable virtual machine environments.", + install_suggestions=[ + "brew install --cask vagrant", + "winget install --id=Hashicorp.Vagrant", + "conda install -c conda-forge vagrant", + "https://developer.hashicorp.com/vagrant/install", + ], + ), + # ------------------------------------------------------------------ + # JavaScript / Node alternative runtimes and package managers + # ------------------------------------------------------------------ + ToolInfo( + name="pnpm", + description="Fast, disk-efficient Node.js package manager.", + install_suggestions=[ + "npm install -g pnpm", + "brew install pnpm", + "winget install --id=pnpm.pnpm", + "https://pnpm.io/installation", + ], + ), + ToolInfo( + name="bun", + description="Fast all-in-one JavaScript runtime, bundler, and package manager.", + install_suggestions=[ + "curl -fsSL https://bun.sh/install | bash", + "brew install oven-sh/bun/bun", + "winget install --id=Oven-sh.Bun", + "https://bun.sh/docs/installation", + ], + ), + ToolInfo( + name="deno", + description="Secure JavaScript/TypeScript runtime built on V8.", + install_suggestions=[ + "curl -fsSL https://deno.land/install.sh | sh", + "brew install deno", + "conda install -c conda-forge deno", + "winget install --id=DenoLand.Deno", + "https://deno.com/manual/getting_started/installation", + ], + ), + ToolInfo( + name="npx", + description="Node.js package runner bundled with npm; executes packages without installing.", + install_suggestions=[ + "https://nodejs.org/en/download/", + "nvm install --lts", + "conda install -c conda-forge nodejs", + ], + ), + # ------------------------------------------------------------------ + # Web app frameworks (Python) + # ------------------------------------------------------------------ + ToolInfo( + name="shiny", + description="Shiny for Python — build interactive web apps from Python scripts.", + install_suggestions=[ + "pip install shiny", + "uv add shiny", + "conda install -c conda-forge shiny", + ], + ), ] } From 7fa099f3a780ca3181c6e23fac790c4ebcddea37 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 17 Apr 2026 15:40:48 -0400 Subject: [PATCH 7/9] remove raw http suggestions (but I see we need to add mode options in general) --- src/projspec/tools.py | 46 ++++++------------------------------------- 1 file changed, 6 insertions(+), 40 deletions(-) diff --git a/src/projspec/tools.py b/src/projspec/tools.py index ef06e40..6fbb72c 100644 --- a/src/projspec/tools.py +++ b/src/projspec/tools.py @@ -45,7 +45,6 @@ class ToolInfo: "uv python install 3.12", "conda install python=3.12", "brew install python", - "https://www.python.org/downloads/", "winget install --id=Python.Python.3", ], ), @@ -77,10 +76,13 @@ class ToolInfo: name="conda", description="Cross-platform package and environment manager (Anaconda/Miniconda/Miniforge).", install_suggestions=[ - "https://github.com/conda-forge/miniforge#install", - "https://docs.conda.io/en/latest/miniconda.html", "brew install --cask miniforge", "winget install --id=Anaconda.Miniconda3", + ( + "mkdir -p ~/miniconda3 && wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh " + "&& bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 && rm -rf ~/miniconda3/miniconda.sh " + "&& ~/miniconda3/bin/conda init bash" + ), ], ), ToolInfo( @@ -98,7 +100,6 @@ class ToolInfo: "conda install -c conda-forge rattler-build", "cargo install rattler-build", "brew install rattler-build", - "https://github.com/prefix-dev/rattler-build/releases", ], ), ToolInfo( @@ -118,7 +119,6 @@ class ToolInfo: name="docker", description="Container platform for building, shipping, and running applications.", install_suggestions=[ - "https://www.docker.com/products/docker-desktop/", "brew install --cask docker", "sudo apt-get install docker-ce docker-ce-cli containerd.io", "sudo dnf install docker-ce docker-ce-cli containerd.io", @@ -134,7 +134,6 @@ class ToolInfo: "curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash && nvm install --lts", "conda install -c conda-forge nodejs", "brew install node", - "https://nodejs.org/en/download/", "winget install --id=OpenJS.NodeJS", ], ), @@ -142,8 +141,8 @@ class ToolInfo: name="npm", description="Default package manager bundled with Node.js.", install_suggestions=[ - "https://nodejs.org/en/download/", "nvm install --lts", + "brew install node", "conda install -c conda-forge nodejs", ], ), @@ -154,7 +153,6 @@ class ToolInfo: "npm install -g yarn", "brew install yarn", "conda install -c conda-forge yarn", - "https://yarnpkg.com/getting-started/install", ], ), ToolInfo( @@ -210,7 +208,6 @@ class ToolInfo: "sudo dnf install git", "conda install -c conda-forge git", "winget install --id=Git.Git", - "https://git-scm.com/downloads", ], ), # ------------------------------------------------------------------ @@ -280,9 +277,6 @@ class ToolInfo: "conda install -c conda-forge briefcase", ], ), - # ------------------------------------------------------------------ - # MLOps - # ------------------------------------------------------------------ ToolInfo( name="mlflow", description="Open-source platform for managing the ML lifecycle.", @@ -292,9 +286,6 @@ class ToolInfo: "uv add mlflow", ], ), - # ------------------------------------------------------------------ - # CI/CD task runners - # ------------------------------------------------------------------ ToolInfo( name="task", description="Task runner / build tool using Taskfile.yml (go-task).", @@ -303,7 +294,6 @@ class ToolInfo: "conda install -c conda-forge go-task", 'sh -c "$(curl --location https://taskfile.dev/install.sh)" -- -d -b /usr/local/bin', "winget install --id=Task.Task", - "https://taskfile.dev/installation/", ], ), ToolInfo( @@ -314,7 +304,6 @@ class ToolInfo: "conda install -c conda-forge just", "cargo install just", "winget install --id=Casey.Just", - "https://github.com/casey/just#installation", ], ), ToolInfo( @@ -337,9 +326,6 @@ class ToolInfo: "uv tool install nox", ], ), - # ------------------------------------------------------------------ - # Data / ML workflow tools - # ------------------------------------------------------------------ ToolInfo( name="dbt", description="Data transformation tool that runs SQL models against a data warehouse.", @@ -347,14 +333,12 @@ class ToolInfo: "pip install dbt-core", "uv add dbt-core", "conda install -c conda-forge dbt-core", - "https://docs.getdbt.com/docs/core/installation-overview", ], ), ToolInfo( name="quarto", description="Open-source scientific and technical publishing system.", install_suggestions=[ - "https://quarto.org/docs/get-started/", "brew install --cask quarto", "conda install -c conda-forge quarto", "winget install --id=Posit.Quarto", @@ -395,7 +379,6 @@ class ToolInfo: "pip install apache-airflow", "uv add apache-airflow", "conda install -c conda-forge apache-airflow", - "https://airflow.apache.org/docs/apache-airflow/stable/installation/", ], ), ToolInfo( @@ -447,7 +430,6 @@ class ToolInfo: "cargo install mdbook", "brew install mdbook", "conda install -c conda-forge mdbook", - "https://rust-lang.github.io/mdBook/guide/installation.html", ], ), # ------------------------------------------------------------------ @@ -460,7 +442,6 @@ class ToolInfo: "brew install terraform", "conda install -c conda-forge terraform", "winget install --id=Hashicorp.Terraform", - "https://developer.hashicorp.com/terraform/install", ], ), ToolInfo( @@ -482,7 +463,6 @@ class ToolInfo: "brew install pulumi/tap/pulumi", "conda install -c conda-forge pulumi", "winget install --id=Pulumi.Pulumi", - "https://www.pulumi.com/docs/install/", ], ), ToolInfo( @@ -491,7 +471,6 @@ class ToolInfo: install_suggestions=[ "npm install -g aws-cdk", "npx aws-cdk@latest", - "https://docs.aws.amazon.com/cdk/v2/guide/getting-started.html", ], ), ToolInfo( @@ -501,7 +480,6 @@ class ToolInfo: "brew install earthly/earthly/earthly", "sudo /bin/sh -c 'wget https://github.com/earthly/earthly/releases/latest/download/earthly-linux-amd64 -O /usr/local/bin/earthly && chmod +x /usr/local/bin/earthly'", "winget install --id=Earthly.Earthly", - "https://earthly.dev/get-earthly", ], ), ToolInfo( @@ -510,7 +488,6 @@ class ToolInfo: install_suggestions=[ "curl -sSL https://nixpacks.com/install.sh | bash", "brew install railwayapp/tap/nixpacks", - "https://nixpacks.com/docs/getting-started", ], ), ToolInfo( @@ -520,12 +497,8 @@ class ToolInfo: "brew install --cask vagrant", "winget install --id=Hashicorp.Vagrant", "conda install -c conda-forge vagrant", - "https://developer.hashicorp.com/vagrant/install", ], ), - # ------------------------------------------------------------------ - # JavaScript / Node alternative runtimes and package managers - # ------------------------------------------------------------------ ToolInfo( name="pnpm", description="Fast, disk-efficient Node.js package manager.", @@ -533,7 +506,6 @@ class ToolInfo: "npm install -g pnpm", "brew install pnpm", "winget install --id=pnpm.pnpm", - "https://pnpm.io/installation", ], ), ToolInfo( @@ -543,7 +515,6 @@ class ToolInfo: "curl -fsSL https://bun.sh/install | bash", "brew install oven-sh/bun/bun", "winget install --id=Oven-sh.Bun", - "https://bun.sh/docs/installation", ], ), ToolInfo( @@ -554,21 +525,16 @@ class ToolInfo: "brew install deno", "conda install -c conda-forge deno", "winget install --id=DenoLand.Deno", - "https://deno.com/manual/getting_started/installation", ], ), ToolInfo( name="npx", description="Node.js package runner bundled with npm; executes packages without installing.", install_suggestions=[ - "https://nodejs.org/en/download/", "nvm install --lts", "conda install -c conda-forge nodejs", ], ), - # ------------------------------------------------------------------ - # Web app frameworks (Python) - # ------------------------------------------------------------------ ToolInfo( name="shiny", description="Shiny for Python — build interactive web apps from Python scripts.", From dca1bc2591b63c0b635830b181efe351c5ece45f Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 17 Apr 2026 16:06:24 -0400 Subject: [PATCH 8/9] remove unneeded comments --- src/projspec/tools.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/src/projspec/tools.py b/src/projspec/tools.py index 6fbb72c..3dbf10d 100644 --- a/src/projspec/tools.py +++ b/src/projspec/tools.py @@ -24,9 +24,6 @@ class ToolInfo: TOOLS: dict[str, ToolInfo] = { t.name: t for t in [ - # ------------------------------------------------------------------ - # Python ecosystem - # ------------------------------------------------------------------ ToolInfo( name="uv", description="Extremely fast Python package and project manager (pip/venv/build replacement).", @@ -69,9 +66,6 @@ class ToolInfo: "pipx install pre-commit", ], ), - # ------------------------------------------------------------------ - # Conda ecosystem - # ------------------------------------------------------------------ ToolInfo( name="conda", description="Cross-platform package and environment manager (Anaconda/Miniconda/Miniforge).", @@ -112,9 +106,6 @@ class ToolInfo: "conda install -c conda-forge pixi", ], ), - # ------------------------------------------------------------------ - # Containers - # ------------------------------------------------------------------ ToolInfo( name="docker", description="Container platform for building, shipping, and running applications.", @@ -124,9 +115,6 @@ class ToolInfo: "sudo dnf install docker-ce docker-ce-cli containerd.io", ], ), - # ------------------------------------------------------------------ - # Node / JavaScript ecosystem - # ------------------------------------------------------------------ ToolInfo( name="node", description="JavaScript runtime environment (Node.js).", @@ -173,9 +161,6 @@ class ToolInfo: "brew install copier", ], ), - # ------------------------------------------------------------------ - # Rust ecosystem - # ------------------------------------------------------------------ ToolInfo( name="cargo", description="Rust package manager and build tool.", @@ -196,9 +181,6 @@ class ToolInfo: "cargo install maturin", ], ), - # ------------------------------------------------------------------ - # Version control - # ------------------------------------------------------------------ ToolInfo( name="git", description="Distributed version control system.", @@ -210,9 +192,6 @@ class ToolInfo: "winget install --id=Git.Git", ], ), - # ------------------------------------------------------------------ - # Web frameworks / app runners - # ------------------------------------------------------------------ ToolInfo( name="streamlit", description="Framework for turning Python scripts into shareable web apps.", @@ -391,9 +370,6 @@ class ToolInfo: "mamba install -c conda-forge -c bioconda snakemake", ], ), - # ------------------------------------------------------------------ - # Documentation tools - # ------------------------------------------------------------------ ToolInfo( name="mkdocs", description="Static site generator for project documentation, written in Python.", @@ -432,9 +408,6 @@ class ToolInfo: "conda install -c conda-forge mdbook", ], ), - # ------------------------------------------------------------------ - # Infrastructure / IaC tools - # ------------------------------------------------------------------ ToolInfo( name="terraform", description="Infrastructure as Code tool by HashiCorp for provisioning cloud resources.", From 8d6ed2a9f3f02c606fc6ab3067452f03d583f780 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 17 Apr 2026 18:39:04 -0400 Subject: [PATCH 9/9] Add tests --- src/projspec/config.py | 5 + src/projspec/tools.py | 187 +++- src/projspec/utils.py | 7 +- tests/test_new_specs.py | 2018 +++++++++++++++++++++++++++++++++++++++ tests/test_tools.py | 356 +++++++ 5 files changed, 2569 insertions(+), 4 deletions(-) create mode 100644 tests/test_new_specs.py create mode 100644 tests/test_tools.py diff --git a/src/projspec/config.py b/src/projspec/config.py index 11cbee2..5d83d70 100644 --- a/src/projspec/config.py +++ b/src/projspec/config.py @@ -20,6 +20,7 @@ def defaults(): "scan_max_size": 5 * 2**10, "remote_artifact_status": False, "capture_artifact_output": True, + "preferred_install_methods": ["conda", "pip"], } @@ -33,6 +34,10 @@ def defaults(): "if True, capture and enqueue output from spawned Process artifacts. " "Otherwise, output appears on stdout/err." ), + "preferred_install_methods": ( + "ordered list of preferred installer names for install_tool(), " + "e.g. ['uv', 'conda', 'pip']. Empty list uses the platform default." + ), } diff --git a/src/projspec/tools.py b/src/projspec/tools.py index 3dbf10d..6d8ff63 100644 --- a/src/projspec/tools.py +++ b/src/projspec/tools.py @@ -1,5 +1,12 @@ +from __future__ import annotations + +import platform +import subprocess +import sys from dataclasses import dataclass, field +from projspec.utils import is_installed + @dataclass class ToolInfo: @@ -527,7 +534,7 @@ def suggest(tool_name: str) -> str: Parameters ---------- tool_name: - The executable name as it appears in ``TOOLS`` (e.g. ``"uv"``). + The executable name as it appears in `TOOLS` (e.g. `"uv"`). Returns ------- @@ -557,3 +564,181 @@ def suggest(tool_name: str) -> str: for command in info.install_suggestions: lines.append(f" {command}") return "\n".join(lines) + + +def _is_url(s: str) -> bool: + return s.startswith(("https://", "http://")) + + +def _is_shell_string(s: str) -> bool: + """True when *s* requires a POSIX shell (contains a pipe, redirect, etc.).""" + return any(ch in s for ch in ("|", ">", "<", "&&", ";")) + + +def _leading_executable(s: str) -> str: + """Return the first word of an install string (the executable to invoke).""" + return s.split()[0] if s.split() else "" + + +# Platform names returned by sys.platform +_WINDOWS_PLATFORMS = {"win32", "cygwin", "msys"} +_IS_POSIX = sys.platform not in _WINDOWS_PLATFORMS +_CURRENT_PLATFORM = sys.platform if _IS_POSIX else "windows" + + +def _method_is_viable(install_string: str) -> bool: + """Return True when *install_string* can in principle be run on this machine. + + Rules: + - URL strings are never directly executable. + - `winget` strings are only viable on Windows. + - Shell one-liners (containing `|`, `>`, etc.) are only viable on POSIX. + - `brew` is only viable when Homebrew is present (i.e. on macOS/Linux with + Homebrew installed). + - For everything else: viable if the leading executable exists on PATH. + """ + if _is_url(install_string): + return False + + # winget is Windows-only + if _leading_executable(install_string) == "winget": + return not _IS_POSIX + + # Shell one-liners require a POSIX shell + if _is_shell_string(install_string): + return _IS_POSIX and _leading_executable(install_string) in is_installed + + exe = _leading_executable(install_string) + return exe in is_installed + + +def _preferred_install_methods() -> list[str]: + """Return the user-configured ordered preference list of installer names. + + Reads `preferred_install_methods` from projspec config (a list of + installer executable names, e.g. `["uv", "conda", "pip"]`). Falls back + to a sensible platform-appropriate default ordering when not configured. + """ + from projspec.config import get_conf + + user = get_conf("preferred_install_methods") + + # Sensible first line install options + defaults: list[str] = [ + "uv", + "conda", + "mamba", + "pip", + "pip3", + "pipx", + "cargo", + "npm", + "npx", + ] + if not _IS_POSIX: + defaults.append("winget") + # shell one-liners come last among executable methods + defaults += ["brew", "curl", "sh", "bash", "sudo"] + + actual = user + [_ for _ in defaults if _ not in user] + return actual + + +def _rank_install_string(s: str, preference_order: list[str]) -> tuple[int, int]: + """Return a (preference_rank, original_index) sort key for *s*. + + Lower is better. Strings whose leading executable appears earlier in + *preference_order* sort first. URLs and shell strings without a + recognisable leading executable go to the end. + """ + exe = _leading_executable(s) + try: + rank = preference_order.index(exe) + except ValueError: + rank = len(preference_order) + return rank + + +def choose_install_method(tool_name: str) -> str | None: + """Pick the best viable install method for *tool_name* on this machine. + + Selection algorithm + ------------------- + 1. Look up *tool_name* in :data:`TOOLS`. If not found, return `None`. + 2. Filter `install_suggestions` to those that are *viable* on the current + platform (see :func:`_method_is_viable`). + 3. Among the viable candidates, rank them by the ordered preference list + obtained from :func:`_preferred_install_methods` (which reads + `preferred_install_methods` from the projspec config, falling back to + a sensible platform default). + 4. Return the best-ranked candidate, or `None` if no viable candidate + exists. + + Parameters + ---------- + tool_name: + The executable name as it appears in :data:`TOOLS` (e.g. `"uv"`). + + Returns + ------- + str or None + The chosen install string (e.g. `"pip install uv"`), or `None` + when the tool is unknown or no viable install method was found. + """ + info = TOOLS.get(tool_name) + if info is None: + return None + + preference = _preferred_install_methods() + viable = [s for s in info.install_suggestions if _method_is_viable(s)] + if not viable: + return None + + return min(viable, key=lambda s: _rank_install_string(s, preference)) + + +def install_tool(tool_name: str) -> int: + """Install *tool_name* using the best available method for this machine. + + Selects an install command via :func:`choose_install_method`, then + executes it. Shell-style strings (those containing `|`, `>`, etc.) + are run with `subprocess.call(..., shell=True)`; regular space-separated + commands are split and passed as a list. + + Parameters + ---------- + tool_name: + The executable name as it appears in :data:`TOOLS` (e.g. `"uv"`). + + Returns + ------- + int + The exit code of the install command (0 = success). + + Raises + ------ + ValueError + If *tool_name* is not found in :data:`TOOLS`. + RuntimeError + If no viable install method exists for the current platform. + """ + info = TOOLS.get(tool_name) + if info is None: + raise ValueError( + f"Unknown tool {tool_name!r}. " f"Known tools: {', '.join(sorted(TOOLS))}" + ) + + method = choose_install_method(tool_name) + if method is None: + raise RuntimeError( + f"No viable install method found for {tool_name!r} " + f"on {_CURRENT_PLATFORM!r}. " + f"Available suggestions:\n" + + "\n".join(f" {s}" for s in info.install_suggestions) + ) + + if _is_shell_string(method): + # Shell one-liners must run in a POSIX shell + return subprocess.call(method, shell=True) + else: + return subprocess.call(method.split()) diff --git a/src/projspec/utils.py b/src/projspec/utils.py index 0fcea37..3917576 100644 --- a/src/projspec/utils.py +++ b/src/projspec/utils.py @@ -176,10 +176,11 @@ def __init__(self): self.env = _linked_local_path(sys.executable) def exists(self, cmd: str, refresh=False): - """Test if command can be called, by starting a subprocess + """Test if command can be called by starting a subprocess - This is more costly what some PATH lookup (i.e., what ``which()`` does), but also - more rigorous. + This is more costly what some PATH lookup (i.e., what `which()` does), but also + more rigorous. We cache the result - currently for the session, and + eventually persistently. """ if refresh or (self.env, cmd) not in self.cache: try: diff --git a/tests/test_new_specs.py b/tests/test_new_specs.py new file mode 100644 index 0000000..f41a6c5 --- /dev/null +++ b/tests/test_new_specs.py @@ -0,0 +1,2018 @@ +"""Tests for new project spec types added in the bulk expansion. + +Structure +--------- +Each spec family gets one class with: + - test_match_positive – the spec IS detected given the right files + - test_match_negative – the spec is NOT detected without those files + - test_parse_contents – expected content keys are present after parse() + - test_parse_artifacts – expected artifact keys are present after parse() + - (where applicable) test_parse_detail – spot-check on specific parsed values + +Helper +------ +``make_spec(cls, tmpdir, files)`` writes *files* (dict of rel-path → text) into +*tmpdir* and returns a freshly constructed spec instance with _contents and +_artifacts pre-initialised to None (matching the state before parse() is called +by ProjectSpec.__init__). +""" + +import json +import os +import textwrap + +import pytest +import yaml + +import projspec + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def write_files(tmpdir, files: dict[str, str]) -> str: + """Write *files* into *tmpdir* and return the directory path.""" + path = str(tmpdir) + for rel, content in files.items(): + full = os.path.join(path, rel) + os.makedirs(os.path.dirname(full), exist_ok=True) + with open(full, "w") as f: + f.write(textwrap.dedent(content)) + return path + + +def make_proj(tmpdir, files: dict[str, str]): + path = write_files(tmpdir, files) + return projspec.Project(path) + + +def raw_spec(cls, proj): + """Instantiate a spec bypassing __init__'s match() call, for manual testing.""" + inst = cls.__new__(cls) + inst.proj = proj + inst._contents = None + inst._artifacts = None + return inst + + +# --------------------------------------------------------------------------- +# CI/CD specs +# --------------------------------------------------------------------------- + + +class TestGitHubActions: + FILES = { + ".github/workflows/ci.yml": """\ + name: CI + on: + push: + branches: [main] + jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import GitHubActions + + assert raw_spec(GitHubActions, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.cicd import GitHubActions + + assert not raw_spec(GitHubActions, proj).match() + + def test_parse_contents(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import GitHubActions + + spec = raw_spec(GitHubActions, proj) + spec.parse() + assert "ci_workflow" in spec._contents + + def test_parse_detail(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import GitHubActions + + spec = raw_spec(GitHubActions, proj) + spec.parse() + wf = list(spec._contents["ci_workflow"].values())[0] + assert wf.provider == "github" + assert "test" in wf.jobs + assert "push" in wf.triggers + + def test_multiple_workflows(self, tmpdir): + files = dict(self.FILES) + files[".github/workflows/release.yml"] = textwrap.dedent( + """\ + name: Release + on: [push] + jobs: + build: + runs-on: ubuntu-latest + steps: [] + """ + ) + proj = make_proj(tmpdir, files) + from projspec.proj.cicd import GitHubActions + + spec = raw_spec(GitHubActions, proj) + spec.parse() + assert len(spec._contents["ci_workflow"]) == 2 + + +class TestGitLabCI: + FILES = { + ".gitlab-ci.yml": """\ + stages: + - test + - deploy + test: + stage: test + script: + - pytest + deploy: + stage: deploy + script: + - echo deploy + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import GitLabCI + + assert raw_spec(GitLabCI, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.cicd import GitLabCI + + assert not raw_spec(GitLabCI, proj).match() + + def test_parse_contents(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import GitLabCI + + spec = raw_spec(GitLabCI, proj) + spec.parse() + wf = spec._contents["ci_workflow"] + assert wf.provider == "gitlab" + assert "test" in wf.jobs + assert "deploy" in wf.jobs + assert "test" in wf.triggers + + +class TestCircleCI: + FILES = { + ".circleci/config.yml": """\ + version: 2.1 + jobs: + build: + docker: + - image: cimg/python:3.11 + steps: + - checkout + - run: pytest + workflows: + main: + jobs: + - build + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import CircleCI + + assert raw_spec(CircleCI, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.cicd import CircleCI + + assert not raw_spec(CircleCI, proj).match() + + def test_parse_contents(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import CircleCI + + spec = raw_spec(CircleCI, proj) + spec.parse() + wf = spec._contents["ci_workflow"] + assert wf.provider == "circleci" + assert "build" in wf.jobs + + +class TestTaskfile: + FILES = { + "Taskfile.yml": """\ + version: '3' + tasks: + build: + desc: Build the project + cmds: + - echo building + test: + desc: Run tests + cmds: + - pytest + lint: + cmds: + - ruff check . + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import Taskfile + + assert raw_spec(Taskfile, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.cicd import Taskfile + + assert not raw_spec(Taskfile, proj).match() + + def test_match_variant_names(self, tmpdir): + for name in ("Taskfile.yaml", "taskfile.yml", "taskfile.yaml"): + proj = make_proj( + tmpdir, {name: "version: '3'\ntasks:\n x:\n cmds: [echo]\n"} + ) + from projspec.proj.cicd import Taskfile + + assert raw_spec(Taskfile, proj).match(), f"{name} should match" + + def test_parse_contents(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import Taskfile + + spec = raw_spec(Taskfile, proj) + spec.parse() + assert "build" in spec._contents["command"] + assert "test" in spec._contents["command"] + assert "lint" in spec._contents["command"] + + def test_parse_artifacts(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import Taskfile + + spec = raw_spec(Taskfile, proj) + spec.parse() + assert "build" in spec._artifacts["process"] + assert spec._artifacts["process"]["build"].cmd == ["task", "build"] + + +class TestJustFile: + FILES = { + "justfile": """\ + # Build the project + build: + cargo build --release + + # Run tests + test: + cargo test + + fmt: + cargo fmt + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import JustFile + + assert raw_spec(JustFile, proj).match() + + def test_match_Justfile_capitalised(self, tmpdir): + proj = make_proj(tmpdir, {"Justfile": "build:\n echo ok\n"}) + from projspec.proj.cicd import JustFile + + assert raw_spec(JustFile, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.cicd import JustFile + + assert not raw_spec(JustFile, proj).match() + + def test_parse_recipes(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import JustFile + + spec = raw_spec(JustFile, proj) + spec.parse() + assert "build" in spec._contents["command"] + assert "test" in spec._contents["command"] + assert "fmt" in spec._contents["command"] + assert spec._artifacts["process"]["build"].cmd == ["just", "build"] + + +class TestTox: + FILES_INI = { + "tox.ini": """\ + [tox] + envlist = py311, py312, lint + + [testenv] + deps = pytest + commands = pytest {posargs} + + [testenv:lint] + deps = ruff + commands = ruff check . + """, + } + + FILES_PYPROJECT = { + "pyproject.toml": """\ + [tool.tox] + [tool.tox.env.py311] + commands = [["pytest"]] + [tool.tox.env.lint] + commands = [["ruff", "check", "."]] + """, + } + + def test_match_tox_ini(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_INI) + from projspec.proj.cicd import Tox + + assert raw_spec(Tox, proj).match() + + def test_match_pyproject(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_PYPROJECT) + from projspec.proj.cicd import Tox + + assert raw_spec(Tox, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.cicd import Tox + + assert not raw_spec(Tox, proj).match() + + def test_parse_envlist(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_INI) + from projspec.proj.cicd import Tox + + spec = raw_spec(Tox, proj) + spec.parse() + assert "py311" in spec._artifacts["process"] + assert "py312" in spec._artifacts["process"] + assert "lint" in spec._artifacts["process"] + assert spec._artifacts["process"]["lint"].cmd == ["tox", "-e", "lint"] + + def test_parse_testenv_sections(self, tmpdir): + # tox.ini with named [testenv:X] sections but no envlist + proj = make_proj(tmpdir, {"tox.ini": "[testenv:unit]\ncommands=pytest\n"}) + from projspec.proj.cicd import Tox + + spec = raw_spec(Tox, proj) + spec.parse() + assert "unit" in spec._artifacts["process"] + + def test_parse_fallback_generic(self, tmpdir): + # tox.ini with no envlist and no [testenv:X] sections + proj = make_proj(tmpdir, {"tox.ini": "[tox]\n"}) + from projspec.proj.cicd import Tox + + spec = raw_spec(Tox, proj) + spec.parse() + assert "tox" in spec._artifacts["process"] + + +# --------------------------------------------------------------------------- +# Data / ML / Workflow specs +# --------------------------------------------------------------------------- + + +class TestDbt: + FILES = { + "dbt_project.yml": """\ + name: 'analytics' + version: '1.0.0' + config-version: 2 + profile: 'default' + model-paths: ['models'] + """, + "models/example.sql": "SELECT 1 AS id", + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Dbt + + assert raw_spec(Dbt, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.dataworkflows import Dbt + + assert not raw_spec(Dbt, proj).match() + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Dbt + + spec = raw_spec(Dbt, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["name"] == "analytics" + assert meta["profile"] == "default" + + def test_parse_standard_commands(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Dbt + + spec = raw_spec(Dbt, proj) + spec.parse() + for cmd in ("run", "test", "build", "compile", "seed"): + assert cmd in spec._contents["command"], f"missing command: {cmd}" + assert cmd in spec._artifacts["process"], f"missing artifact: {cmd}" + + def test_parse_command_values(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Dbt + + spec = raw_spec(Dbt, proj) + spec.parse() + assert spec._contents["command"]["run"].cmd == ["dbt", "run"] + assert spec._artifacts["process"]["build"].cmd == ["dbt", "build"] + + +class TestQuarto: + FILES_PROJECT = { + "_quarto.yml": """\ + project: + type: website + title: My Quarto Site + output-dir: _site + format: + html: + theme: cosmo + """, + "index.qmd": "---\ntitle: Home\n---\nHello!\n", + } + + FILES_SINGLE_QMD = { + "report.qmd": "---\ntitle: Report\n---\nContent here.\n", + } + + def test_match_quarto_yml(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_PROJECT) + from projspec.proj.dataworkflows import Quarto + + assert raw_spec(Quarto, proj).match() + + def test_match_qmd_file(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_SINGLE_QMD) + from projspec.proj.dataworkflows import Quarto + + assert raw_spec(Quarto, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.dataworkflows import Quarto + + assert not raw_spec(Quarto, proj).match() + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_PROJECT) + from projspec.proj.dataworkflows import Quarto + + spec = raw_spec(Quarto, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["title"] == "My Quarto Site" + + def test_parse_artifacts(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_PROJECT) + from projspec.proj.dataworkflows import Quarto + + spec = raw_spec(Quarto, proj) + spec.parse() + assert "render" in spec._artifacts + assert "preview" in spec._artifacts + from projspec.artifact.infra import StaticSite + from projspec.artifact.process import Server + + assert isinstance(spec._artifacts["render"], StaticSite) + assert isinstance(spec._artifacts["preview"], Server) + + def test_parse_custom_output_dir(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_PROJECT) + from projspec.proj.dataworkflows import Quarto + + spec = raw_spec(Quarto, proj) + spec.parse() + assert "_site" in spec._artifacts["render"].fn + + +class TestNox: + FILES = { + "noxfile.py": """\ + import nox + + @nox.session + def tests(session): + session.install('pytest') + session.run('pytest') + + @nox.session(python=['3.11', '3.12']) + def lint(session): + session.install('ruff') + session.run('ruff', 'check', '.') + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Nox + + assert raw_spec(Nox, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.dataworkflows import Nox + + assert not raw_spec(Nox, proj).match() + + def test_parse_sessions(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Nox + + spec = raw_spec(Nox, proj) + spec.parse() + assert "tests" in spec._artifacts["process"] + assert "lint" in spec._artifacts["process"] + assert spec._artifacts["process"]["tests"].cmd == ["nox", "-s", "tests"] + + def test_parse_empty_noxfile_fallback(self, tmpdir): + proj = make_proj(tmpdir, {"noxfile.py": "# no sessions\n"}) + from projspec.proj.dataworkflows import Nox + + spec = raw_spec(Nox, proj) + spec.parse() + # Falls back to generic nox command + assert "nox" in spec._artifacts["process"] + + +class TestPrefect: + FILES = { + "prefect.yaml": """\ + name: my-pipeline + deployments: + - name: daily-etl + entrypoint: flows/etl.py:run_etl + - name: weekly-report + entrypoint: flows/report.py:run_report + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Prefect + + assert raw_spec(Prefect, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.dataworkflows import Prefect + + assert not raw_spec(Prefect, proj).match() + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Prefect + + spec = raw_spec(Prefect, proj) + spec.parse() + assert spec._contents["descriptive_metadata"].meta["name"] == "my-pipeline" + + def test_parse_deployments_as_stages(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Prefect + + spec = raw_spec(Prefect, proj) + spec.parse() + assert "daily-etl" in spec._contents["pipeline_stage"] + assert "weekly-report" in spec._contents["pipeline_stage"] + + def test_parse_artifacts(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Prefect + + spec = raw_spec(Prefect, proj) + spec.parse() + assert "run" in spec._artifacts["process"] + + +class TestSnakemake: + FILES = { + "Snakefile": """\ + rule all: + input: "results/output.txt" + + rule process: + input: "data/input.txt" + output: "results/output.txt" + shell: "cat {input} > {output}" + + rule download: + output: "data/input.txt" + shell: "echo hello > {output}" + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Snakemake + + assert raw_spec(Snakemake, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.dataworkflows import Snakemake + + assert not raw_spec(Snakemake, proj).match() + + def test_parse_rules_as_stages(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Snakemake + + spec = raw_spec(Snakemake, proj) + spec.parse() + stages = spec._contents.get("pipeline_stage", {}) + # 'all' is filtered out; process and download should appear + assert "process" in stages + assert "download" in stages + assert "all" not in stages + + def test_parse_run_artifact(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Snakemake + + spec = raw_spec(Snakemake, proj) + spec.parse() + assert "run" in spec._artifacts["process"] + assert spec._artifacts["process"]["run"].cmd == [ + "snakemake", + "--cores", + "all", + ] + + +class TestAirflow: + FILES = { + "dags/etl_dag.py": """\ + from airflow import DAG + from airflow.operators.python import PythonOperator + + dag = DAG(dag_id='etl_pipeline', schedule='@daily') + """, + "dags/report_dag.py": """\ + from airflow import DAG + dag = DAG(dag_id='weekly_report', schedule='@weekly') + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Airflow + + assert raw_spec(Airflow, proj).match() + + def test_match_negative_no_dags(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.dataworkflows import Airflow + + assert not raw_spec(Airflow, proj).match() + + def test_match_negative_empty_dags(self, tmpdir): + # dags/ exists but no .py files + os.makedirs(str(tmpdir.join("dags")), exist_ok=True) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Airflow + + assert not raw_spec(Airflow, proj).match() + + def test_parse_dag_ids_as_stages(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Airflow + + spec = raw_spec(Airflow, proj) + spec.parse() + stages = spec._contents.get("pipeline_stage", {}) + assert "etl_pipeline" in stages + assert "weekly_report" in stages + + def test_parse_commands(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Airflow + + spec = raw_spec(Airflow, proj) + spec.parse() + assert "standalone" in spec._contents["command"] + assert "webserver" in spec._contents["command"] + + +class TestKedro: + FILES = { + "pyproject.toml": """\ + [tool.kedro] + package_name = "my_project" + project_name = "My Project" + kedro_init_version = "0.19.0" + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Kedro + + assert raw_spec(Kedro, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.dataworkflows import Kedro + + assert not raw_spec(Kedro, proj).match() + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Kedro + + spec = raw_spec(Kedro, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["package_name"] == "my_project" + + def test_parse_default_run_command(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Kedro + + spec = raw_spec(Kedro, proj) + spec.parse() + assert "run" in spec._contents["command"] + assert "run" in spec._artifacts + + def test_parse_pipeline_discovery(self, tmpdir): + # Create pipeline directories under src//pipelines/ + files = dict(self.FILES) + files["src/my_project/pipelines/ingestion/__init__.py"] = "" + files["src/my_project/pipelines/processing/__init__.py"] = "" + proj = make_proj(tmpdir, files) + from projspec.proj.dataworkflows import Kedro + + spec = raw_spec(Kedro, proj) + spec.parse() + assert "ingestion" in spec._contents.get("pipeline_stage", {}) + assert "processing" in spec._contents.get("pipeline_stage", {}) + + +class TestDagster: + FILES_PYPROJECT = { + "pyproject.toml": """\ + [tool.dagster] + module_name = "my_assets" + """, + } + + FILES_YAML = { + "dagster.yaml": "telemetry:\n enabled: false\n", + } + + def test_match_pyproject(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_PYPROJECT) + from projspec.proj.dataworkflows import Dagster + + assert raw_spec(Dagster, proj).match() + + def test_match_dagster_yaml(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_YAML) + from projspec.proj.dataworkflows import Dagster + + assert raw_spec(Dagster, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.dataworkflows import Dagster + + assert not raw_spec(Dagster, proj).match() + + def test_parse_artifacts(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_PYPROJECT) + from projspec.proj.dataworkflows import Dagster + + spec = raw_spec(Dagster, proj) + spec.parse() + assert "dev" in spec._artifacts + assert "materialize" in spec._artifacts + from projspec.artifact.process import Server + + assert isinstance(spec._artifacts["dev"], Server) + + +# --------------------------------------------------------------------------- +# Documentation specs +# --------------------------------------------------------------------------- + + +class TestMkDocs: + FILES = { + "mkdocs.yml": """\ + site_name: My Project Docs + site_description: Documentation for my project + site_author: Alice + docs_dir: docs + site_dir: site + nav: + - Home: index.md + theme: + name: material + """, + "docs/index.md": "# Welcome\n", + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.documentation import MkDocs + + assert raw_spec(MkDocs, proj).match() + + def test_match_yaml_extension(self, tmpdir): + proj = make_proj(tmpdir, {"mkdocs.yaml": "site_name: X\n"}) + from projspec.proj.documentation import MkDocs + + assert raw_spec(MkDocs, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.documentation import MkDocs + + assert not raw_spec(MkDocs, proj).match() + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.documentation import MkDocs + + spec = raw_spec(MkDocs, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["site_name"] == "My Project Docs" + assert meta["site_author"] == "Alice" + + def test_parse_artifacts(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.documentation import MkDocs + + spec = raw_spec(MkDocs, proj) + spec.parse() + assert "docs" in spec._artifacts + assert "serve" in spec._artifacts + from projspec.artifact.infra import StaticSite + from projspec.artifact.process import Server + + assert isinstance(spec._artifacts["docs"], StaticSite) + assert isinstance(spec._artifacts["serve"], Server) + + def test_parse_output_path(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.documentation import MkDocs + + spec = raw_spec(MkDocs, proj) + spec.parse() + assert "site" in spec._artifacts["docs"].fn + + def test_parse_custom_site_dir(self, tmpdir): + proj = make_proj(tmpdir, {"mkdocs.yml": "site_name: X\nsite_dir: public\n"}) + from projspec.proj.documentation import MkDocs + + spec = raw_spec(MkDocs, proj) + spec.parse() + assert "public" in spec._artifacts["docs"].fn + + +class TestSphinx: + FILES_ROOT = { + "conf.py": """\ + project = "MyLib" + author = "Bob" + release = "1.2.3" + extensions = [] + html_theme = "alabaster" + """, + "index.rst": ".. toctree::\n intro\n", + } + + FILES_DOCS_DIR = { + "docs/conf.py": """\ + project = "MyLib" + author = "Carol" + release = "0.1" + extensions = [] + html_theme = "furo" + """, + "docs/index.rst": "Content\n", + } + + def test_match_root_conf(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_ROOT) + from projspec.proj.documentation import Sphinx + + assert raw_spec(Sphinx, proj).match() + + def test_match_docs_conf(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_DOCS_DIR) + from projspec.proj.documentation import Sphinx + + assert raw_spec(Sphinx, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.documentation import Sphinx + + assert not raw_spec(Sphinx, proj).match() + + def test_parse_metadata_root(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_ROOT) + from projspec.proj.documentation import Sphinx + + spec = raw_spec(Sphinx, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["project"] == "MyLib" + assert meta["author"] == "Bob" + assert meta["release"] == "1.2.3" + + def test_parse_artifacts(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_ROOT) + from projspec.proj.documentation import Sphinx + + spec = raw_spec(Sphinx, proj) + spec.parse() + assert "docs" in spec._artifacts + assert "autobuild" in spec._artifacts + from projspec.artifact.infra import StaticSite + from projspec.artifact.process import Server + + assert isinstance(spec._artifacts["docs"], StaticSite) + assert isinstance(spec._artifacts["autobuild"], Server) + + def test_parse_docs_dir_layout(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_DOCS_DIR) + from projspec.proj.documentation import Sphinx + + spec = raw_spec(Sphinx, proj) + spec.parse() + assert "docs" in spec._artifacts["docs"].fn + + +# --------------------------------------------------------------------------- +# Infrastructure specs +# --------------------------------------------------------------------------- + + +class TestDockerCompose: + FILES = { + "docker-compose.yml": """\ + name: myapp + services: + web: + image: nginx:latest + ports: + - "8080:80" + db: + image: postgres:15 + environment: + POSTGRES_PASSWORD: secret + cache: + image: redis:7 + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import DockerCompose + + assert raw_spec(DockerCompose, proj).match() + + def test_match_compose_yaml(self, tmpdir): + proj = make_proj( + tmpdir, {"compose.yaml": "services:\n app:\n image: alpine\n"} + ) + from projspec.proj.infra import DockerCompose + + assert raw_spec(DockerCompose, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.infra import DockerCompose + + assert not raw_spec(DockerCompose, proj).match() + + def test_parse_services_as_dependencies(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import DockerCompose + + spec = raw_spec(DockerCompose, proj) + spec.parse() + deps = spec._contents["service_dependency"] + assert "web" in deps + assert "db" in deps + assert "cache" in deps + + def test_parse_service_details(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import DockerCompose + + spec = raw_spec(DockerCompose, proj) + spec.parse() + db = spec._contents["service_dependency"]["db"] + assert db.image == "postgres:15" + assert db.service_type == "postgres" + assert db.version == "15" + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import DockerCompose + + spec = raw_spec(DockerCompose, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["name"] == "myapp" + assert "web" in meta["services"] + + def test_parse_stack_artifact(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import DockerCompose + + spec = raw_spec(DockerCompose, proj) + spec.parse() + from projspec.artifact.infra import ComposeStack + + assert isinstance(spec._artifacts["stack"], ComposeStack) + assert "docker-compose.yml" in spec._artifacts["stack"].compose_file + + +class TestTerraform: + FILES = { + "main.tf": """\ + terraform { + required_version = ">= 1.5" + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } + } + + resource "aws_s3_bucket" "data" { + bucket = "my-data-bucket" + } + + resource "aws_lambda_function" "handler" { + function_name = "my-handler" + role = "arn:aws:iam::123:role/role" + handler = "index.handler" + runtime = "python3.11" + } + """, + "variables.tf": 'variable "region" {\n default = "us-east-1"\n}\n', + "outputs.tf": "", + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Terraform + + assert raw_spec(Terraform, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.infra import Terraform + + assert not raw_spec(Terraform, proj).match() + + def test_parse_resource_types(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Terraform + + spec = raw_spec(Terraform, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert "aws_s3_bucket" in meta["resource_types"] + assert "aws_lambda_function" in meta["resource_types"] + + def test_parse_commands(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Terraform + + spec = raw_spec(Terraform, proj) + spec.parse() + for cmd in ("init", "validate", "apply", "destroy"): + assert cmd in spec._contents["command"] + assert cmd in spec._artifacts + + def test_parse_plan_artifact(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Terraform + + spec = raw_spec(Terraform, proj) + spec.parse() + from projspec.artifact.infra import TerraformPlan + + assert isinstance(spec._artifacts["plan"], TerraformPlan) + assert "plan.tfplan" in spec._artifacts["plan"].fn + + +class TestAnsible: + FILES_PLAYBOOK = { + "playbook.yml": """\ + --- + - name: Configure webservers + hosts: webservers + tasks: + - name: Install nginx + apt: + name: nginx + state: present + """, + "inventory": "webserver1 ansible_host=192.168.1.1\n", + } + + FILES_ROLES = { + "site.yml": "---\n- hosts: all\n roles:\n - common\n", + "roles/common/tasks/main.yml": "---\n- name: update\n apt: update_cache=yes\n", + } + + def test_match_playbook(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_PLAYBOOK) + from projspec.proj.infra import Ansible + + assert raw_spec(Ansible, proj).match() + + def test_match_ansible_cfg(self, tmpdir): + proj = make_proj(tmpdir, {"ansible.cfg": "[defaults]\ninventory = inventory\n"}) + from projspec.proj.infra import Ansible + + assert raw_spec(Ansible, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.infra import Ansible + + assert not raw_spec(Ansible, proj).match() + + def test_parse_playbook_commands(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_PLAYBOOK) + from projspec.proj.infra import Ansible + + spec = raw_spec(Ansible, proj) + spec.parse() + assert "playbook" in spec._contents["command"] + assert spec._contents["command"]["playbook"].cmd == [ + "ansible-playbook", + "playbook.yml", + ] + + def test_parse_site_yml(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_ROLES) + from projspec.proj.infra import Ansible + + spec = raw_spec(Ansible, proj) + spec.parse() + assert "site" in spec._contents["command"] + + +class TestPulumi: + FILES = { + "Pulumi.yaml": """\ + name: my-infra + runtime: python + description: Cloud infrastructure for my-infra + """, + } + + FILES_DICT_RUNTIME = { + "Pulumi.yaml": """\ + name: my-infra + runtime: + name: python + options: + virtualenv: venv + description: Uses dict runtime + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Pulumi + + assert raw_spec(Pulumi, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.infra import Pulumi + + assert not raw_spec(Pulumi, proj).match() + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Pulumi + + spec = raw_spec(Pulumi, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["name"] == "my-infra" + assert meta["runtime"] == "python" + + def test_parse_metadata_dict_runtime(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_DICT_RUNTIME) + from projspec.proj.infra import Pulumi + + spec = raw_spec(Pulumi, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["runtime"] == "python" + + def test_parse_artifacts(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Pulumi + + spec = raw_spec(Pulumi, proj) + spec.parse() + from projspec.artifact.deployment import Deployment + + assert isinstance(spec._artifacts["deploy"], Deployment) + assert "preview" in spec._artifacts + + def test_parse_commands(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Pulumi + + spec = raw_spec(Pulumi, proj) + spec.parse() + assert "up" in spec._contents["command"] + assert "destroy" in spec._contents["command"] + assert spec._contents["command"]["up"].cmd == ["pulumi", "up", "--yes"] + + +class TestCDK: + FILES = { + "cdk.json": json.dumps( + { + "app": "npx ts-node --prefer-ts-exts bin/app.ts", + "context": {"@aws-cdk/core:enableStackNameDuplicates": True}, + } + ), + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import CDK + + assert raw_spec(CDK, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.infra import CDK + + assert not raw_spec(CDK, proj).match() + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import CDK + + spec = raw_spec(CDK, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert "ts-node" in meta["app"] + + def test_parse_commands(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import CDK + + spec = raw_spec(CDK, proj) + spec.parse() + for cmd in ("synth", "deploy", "destroy", "diff"): + assert cmd in spec._contents["command"] + + def test_parse_deploy_artifact(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import CDK + + spec = raw_spec(CDK, proj) + spec.parse() + from projspec.artifact.deployment import Deployment + + assert isinstance(spec._artifacts["deploy"], Deployment) + + +class TestEarthfile: + FILES = { + "Earthfile": """\ + VERSION 0.8 + + build: + FROM golang:1.21 + RUN go build ./... + + test: + FROM +build + RUN go test ./... + + docker: + FROM alpine:latest + COPY +build/app /app + ENTRYPOINT ["/app"] + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Earthfile + + assert raw_spec(Earthfile, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.infra import Earthfile + + assert not raw_spec(Earthfile, proj).match() + + def test_parse_targets(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Earthfile + + spec = raw_spec(Earthfile, proj) + spec.parse() + assert "build" in spec._contents["command"] + assert "test" in spec._contents["command"] + assert "docker" in spec._contents["command"] + assert spec._contents["command"]["build"].cmd == ["earthly", "+build"] + + def test_parse_uppercase_directives_filtered(self, tmpdir): + # VERSION, FROM, RUN etc. should not appear as targets + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Earthfile + + spec = raw_spec(Earthfile, proj) + spec.parse() + for key in spec._contents.get("command", {}): + assert ( + key == key.lower() or not key.isupper() + ), f"All-caps directive '{key}' should be filtered out" + + +class TestNixpacks: + FILES = { + "nixpacks.toml": """\ + [phases.setup] + nixPkgs = ['python311', 'poetry'] + + [phases.install] + cmds = ['poetry install --no-dev'] + + [start] + cmd = 'uvicorn app:app --host 0.0.0.0' + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Nixpacks + + assert raw_spec(Nixpacks, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.infra import Nixpacks + + assert not raw_spec(Nixpacks, proj).match() + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Nixpacks + + spec = raw_spec(Nixpacks, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert "setup" in meta["phases"] + assert "install" in meta["phases"] + assert "uvicorn" in meta["start_cmd"] + + def test_parse_docker_image_artifact(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Nixpacks + + spec = raw_spec(Nixpacks, proj) + spec.parse() + from projspec.artifact.process import Process + + assert isinstance(spec._artifacts["build"], Process) + assert "nixpacks" in spec._artifacts["build"].cmd[0] + + +class TestVagrant: + FILES = { + "Vagrantfile": """\ + Vagrant.configure("2") do |config| + config.vm.box = "ubuntu/jammy64" + config.vm.hostname = "dev-server" + config.vm.provider "virtualbox" do |vb| + vb.memory = "2048" + end + end + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Vagrant + + assert raw_spec(Vagrant, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.infra import Vagrant + + assert not raw_spec(Vagrant, proj).match() + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Vagrant + + spec = raw_spec(Vagrant, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["box"] == "ubuntu/jammy64" + assert meta["hostname"] == "dev-server" + + def test_parse_commands(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Vagrant + + spec = raw_spec(Vagrant, proj) + spec.parse() + for cmd in ("up", "halt", "destroy", "ssh"): + assert cmd in spec._contents["command"] + assert spec._contents["command"]["up"].cmd == ["vagrant", "up"] + + def test_parse_vm_artifact(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Vagrant + + spec = raw_spec(Vagrant, proj) + spec.parse() + from projspec.artifact.process import Server + + assert isinstance(spec._artifacts["vm"], Server) + + +# --------------------------------------------------------------------------- +# Web framework specs (scan-based, no _create) +# --------------------------------------------------------------------------- + + +class TestGradio: + GRADIO_APP = """\ + import gradio as gr + + def predict(text): + return text.upper() + + demo = gr.Interface(fn=predict, inputs="text", outputs="text") + + if __name__ == "__main__": + demo.launch() + """ + + GRADIO_BLOCKS = """\ + import gradio as gr + + with gr.Blocks() as demo: + gr.Markdown("Hello!") + + demo.launch() + """ + + def test_match_positive(self, tmpdir): + write_files(tmpdir, {"app.py": self.GRADIO_APP}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Gradio + + assert raw_spec(Gradio, proj).match() + + def test_match_negative(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Gradio + + assert not raw_spec(Gradio, proj).match() + + def test_parse_interface(self, tmpdir): + write_files(tmpdir, {"app.py": self.GRADIO_APP}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Gradio + + spec = raw_spec(Gradio, proj) + spec.parse() + assert "server" in spec._artifacts + assert "app" in spec._artifacts["server"] + + def test_parse_blocks(self, tmpdir): + write_files(tmpdir, {"demo.py": self.GRADIO_BLOCKS}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Gradio + + spec = raw_spec(Gradio, proj) + spec.parse() + assert "demo" in spec._artifacts["server"] + + def test_parse_command_uses_python(self, tmpdir): + write_files(tmpdir, {"app.py": self.GRADIO_APP}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Gradio + + spec = raw_spec(Gradio, proj) + spec.parse() + cmd = spec._artifacts["server"]["app"].cmd + assert cmd[0] == "python" + + def test_parse_non_gradio_ignored(self, tmpdir): + write_files(tmpdir, {"app.py": "import flask\napp = Flask(__name__)\n"}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Gradio + + spec = raw_spec(Gradio, proj) + from projspec.proj.base import ParseFailed + + with pytest.raises(ParseFailed): + spec.parse() + + +class TestShiny: + SHINY_APP = """\ + from shiny import App, render, ui + + app_ui = ui.page_fluid( + ui.input_text("name", "Name:"), + ui.output_text_verbatim("greeting"), + ) + + def server(input, output, session): + @render.text + def greeting(): + return f"Hello, {input.name()}!" + + app = App(app_ui, server) + """ + + def test_match_positive(self, tmpdir): + write_files(tmpdir, {"app.py": self.SHINY_APP}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Shiny + + assert raw_spec(Shiny, proj).match() + + def test_match_negative(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Shiny + + assert not raw_spec(Shiny, proj).match() + + def test_parse_server(self, tmpdir): + write_files(tmpdir, {"app.py": self.SHINY_APP}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Shiny + + spec = raw_spec(Shiny, proj) + spec.parse() + assert "server" in spec._artifacts + assert "app" in spec._artifacts["server"] + cmd = spec._artifacts["server"]["app"].cmd + assert cmd[0] == "shiny" + assert "run" in cmd + + def test_parse_non_shiny_ignored(self, tmpdir): + write_files(tmpdir, {"app.py": "import streamlit as st\nst.write('hello')\n"}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Shiny + + spec = raw_spec(Shiny, proj) + from projspec.proj.base import ParseFailed + + with pytest.raises(ParseFailed): + spec.parse() + + +# --------------------------------------------------------------------------- +# New content types +# --------------------------------------------------------------------------- + + +class TestCIWorkflow: + def test_fields(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.content.cicd import CIWorkflow + + wf = CIWorkflow( + proj=proj, + name="My Workflow", + triggers=["push", "pull_request"], + jobs=["build", "test"], + provider="github", + ) + assert wf.name == "My Workflow" + assert "push" in wf.triggers + assert "build" in wf.jobs + assert wf.provider == "github" + + def test_to_dict(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.content.cicd import CIWorkflow + + wf = CIWorkflow( + proj=proj, + name="CI", + triggers=["push"], + jobs=["test"], + provider="github", + ) + d = wf.to_dict(compact=True) + assert d["name"] == "CI" + assert d["provider"] == "github" + + def test_registered(self): + from projspec.content.base import registry + + assert "c_i_workflow" in registry + + +class TestPipelineStage: + def test_fields(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.content.cicd import PipelineStage + + stage = PipelineStage( + proj=proj, + name="process", + cmd=["snakemake", "process"], + depends_on=["download"], + ) + assert stage.name == "process" + assert stage.cmd == ["snakemake", "process"] + assert "download" in stage.depends_on + + def test_registered(self): + from projspec.content.base import registry + + assert "pipeline_stage" in registry + + +class TestServiceDependency: + def test_fields(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.content.cicd import ServiceDependency + + svc = ServiceDependency( + proj=proj, + name="db", + service_type="postgres", + version="15", + image="postgres:15", + ) + assert svc.name == "db" + assert svc.service_type == "postgres" + assert svc.version == "15" + + def test_registered(self): + from projspec.content.base import registry + + assert "service_dependency" in registry + + +# --------------------------------------------------------------------------- +# New artifact types +# --------------------------------------------------------------------------- + + +class TestComposeStack: + def test_fields(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.artifact.infra import ComposeStack + + stack = ComposeStack(proj=proj, file="docker-compose.yml") + assert stack.compose_file == "docker-compose.yml" + assert "docker" in stack.cmd[0] + assert "compose" in stack.cmd + + def test_registered(self): + from projspec.artifact.base import registry + + assert "compose_stack" in registry + + def test_state_unknown_remote(self, tmpdir): + from projspec.artifact.infra import ComposeStack + from unittest.mock import MagicMock + + # Simulate a remote (non-LocalFileSystem) project + proj = projspec.Project.__new__(projspec.Project) + mock_fs = MagicMock() + mock_fs.__class__.__name__ = "S3FileSystem" + # is_local() uses isinstance check against LocalFileSystem + import fsspec.implementations.local + + mock_fs.__class__ = type("S3FileSystem", (), {}) + proj.fs = mock_fs + proj.url = "bucket/prefix" + stack = ComposeStack(proj=proj) + # Remote project: state should be "" (unknown) + assert stack.state == "" + + +class TestStaticSite: + def test_fields(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.artifact.infra import StaticSite + + site = StaticSite( + proj=proj, cmd=["mkdocs", "build"], fn="/path/site/index.html" + ) + assert site.fn == "/path/site/index.html" + assert site.cmd == ["mkdocs", "build"] + + def test_registered(self): + from projspec.artifact.base import registry + + assert "static_site" in registry + + def test_is_done_when_file_exists(self, tmpdir): + path = str(tmpdir) + os.makedirs(os.path.join(path, "site")) + index = os.path.join(path, "site", "index.html") + open(index, "w").close() + proj = projspec.Project(path) + from projspec.artifact.infra import StaticSite + + site = StaticSite(proj=proj, cmd=["mkdocs", "build"], fn=index) + assert site._is_done() + + def test_is_clean_when_file_absent(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.artifact.infra import StaticSite + + site = StaticSite( + proj=proj, + cmd=["mkdocs", "build"], + fn=str(tmpdir.join("site/index.html")), + ) + assert site._is_clean() + + +class TestTerraformPlan: + def test_fields(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.artifact.infra import TerraformPlan + + plan = TerraformPlan(proj=proj) + assert "plan.tfplan" in plan.fn + assert plan.cmd == ["terraform", "plan", "-out", "plan.tfplan"] + + def test_custom_plan_file(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.artifact.infra import TerraformPlan + + plan = TerraformPlan(proj=proj, plan_file="infra.tfplan") + assert "infra.tfplan" in plan.fn + assert "infra.tfplan" in plan.cmd + + def test_registered(self): + from projspec.artifact.base import registry + + assert "terraform_plan" in registry + + +# --------------------------------------------------------------------------- +# Metaflow +# --------------------------------------------------------------------------- + +HELLO_FLOW = """\ +from metaflow import FlowSpec, step + + +class HelloFlow(FlowSpec): + @step + def start(self): + print("Hello!") + self.next(self.end) + + @step + def end(self): + print("Done.") + + +if __name__ == "__main__": + HelloFlow() +""" + +TRAIN_FLOW = """\ +from metaflow import FlowSpec, step, project, schedule, Parameter + + +@project(name="my_ml_project") +@schedule(daily=True) +class TrainFlow(FlowSpec): + learning_rate = Parameter("lr", default=0.01) + + @step + def start(self): + self.next(self.train) + + @step + def train(self): + print(f"Training with lr={self.learning_rate}") + self.next(self.end) + + @step + def end(self): + print("Training complete") + + +if __name__ == "__main__": + TrainFlow() +""" + +NOT_METAFLOW = """\ +import pandas as pd + +def process(): + return pd.DataFrame({"a": [1, 2, 3]}) +""" + + +class TestMetaflow: + def test_match_positive(self, tmpdir): + write_files(tmpdir, {"flow.py": HELLO_FLOW}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + assert raw_spec(Metaflow, proj).match() + + def test_match_negative_no_py(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + assert not raw_spec(Metaflow, proj).match() + + def test_match_negative_non_metaflow_py(self, tmpdir): + write_files(tmpdir, {"script.py": NOT_METAFLOW}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + assert not raw_spec(Metaflow, proj).match() + + def test_match_requires_both_import_and_flowspec(self, tmpdir): + # import present but no FlowSpec subclass + write_files( + tmpdir, + {"util.py": "from metaflow import Parameter\nx = Parameter('n')\n"}, + ) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + assert not raw_spec(Metaflow, proj).match() + + def test_parse_run_command(self, tmpdir): + write_files(tmpdir, {"flow.py": HELLO_FLOW}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + spec = raw_spec(Metaflow, proj) + spec.parse() + assert "flow" in spec._contents["command"] + assert spec._contents["command"]["flow"].cmd == [ + "python", + "flow.py", + "run", + ] + + def test_parse_process_artifact(self, tmpdir): + write_files(tmpdir, {"flow.py": HELLO_FLOW}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + spec = raw_spec(Metaflow, proj) + spec.parse() + assert "flow" in spec._artifacts["process"] + assert spec._artifacts["process"]["flow"].cmd == [ + "python", + "flow.py", + "run", + ] + + def test_parse_step_names_as_pipeline_stages(self, tmpdir): + write_files(tmpdir, {"flow.py": HELLO_FLOW}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + spec = raw_spec(Metaflow, proj) + spec.parse() + stages = spec._contents.get("pipeline_stage", {}) + assert "flow.start" in stages + assert "flow.end" in stages + + def test_parse_project_name_from_decorator(self, tmpdir): + write_files(tmpdir, {"train.py": TRAIN_FLOW}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + spec = raw_spec(Metaflow, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["project"] == "my_ml_project" + + def test_parse_deployment_artifacts_when_scheduled(self, tmpdir): + write_files(tmpdir, {"train.py": TRAIN_FLOW}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + spec = raw_spec(Metaflow, proj) + spec.parse() + procs = spec._artifacts["process"] + assert "train.argo_create" in procs + assert "train.step_functions_create" in procs + assert procs["train.argo_create"].cmd == [ + "python", + "train.py", + "argo-workflows", + "create", + ] + assert procs["train.step_functions_create"].cmd == [ + "python", + "train.py", + "step-functions", + "create", + ] + + def test_parse_no_deployment_artifacts_without_schedule(self, tmpdir): + write_files(tmpdir, {"flow.py": HELLO_FLOW}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + spec = raw_spec(Metaflow, proj) + spec.parse() + procs = spec._artifacts["process"] + assert not any("argo" in k or "step_functions" in k for k in procs) + + def test_parse_multiple_flows(self, tmpdir): + write_files(tmpdir, {"flow.py": HELLO_FLOW, "train.py": TRAIN_FLOW}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + spec = raw_spec(Metaflow, proj) + spec.parse() + assert "flow" in spec._contents["command"] + assert "train" in spec._contents["command"] + + def test_create_writes_flow_file(self, tmpdir): + path = str(tmpdir) + from projspec.proj.dataworkflows import Metaflow + + Metaflow._create(path) + flow_file = os.path.join(path, "flow.py") + assert os.path.exists(flow_file) + content = open(flow_file).read() + assert "FlowSpec" in content + assert "@step" in content + assert "def start" in content + assert "def end" in content + assert "if __name__" in content + + def test_create_flow_class_name_derived_from_dir(self, tmpdir): + path = str(tmpdir.mkdir("my_pipeline")) + from projspec.proj.dataworkflows import Metaflow + + Metaflow._create(path) + content = open(os.path.join(path, "flow.py")).read() + assert "MyPipelineFlow" in content + + def test_roundtrip_create_and_detect(self, tmpdir): + """create() produces files that match() and parse() accept.""" + path = str(tmpdir) + proj = projspec.Project(path) + proj.create("Metaflow") + # Re-scan so scanned_files picks up the new flow.py + proj2 = projspec.Project(path) + assert "metaflow" in proj2 diff --git a/tests/test_tools.py b/tests/test_tools.py new file mode 100644 index 0000000..2bf1123 --- /dev/null +++ b/tests/test_tools.py @@ -0,0 +1,356 @@ +"""Tests for projspec.tools.choose_install_method and install_tool.""" + +import subprocess +import sys +from contextlib import contextmanager +from unittest.mock import patch, MagicMock + +import pytest + +import projspec.tools as tools +from projspec.tools import ( + ToolInfo, + TOOLS, + _is_url, + _is_shell_string, + _leading_executable, + _method_is_viable, + _rank_install_string, + _preferred_install_methods, + choose_install_method, + install_tool, +) +from projspec.config import temp_conf + + +# --------------------------------------------------------------------------- +# Helpers for patching is_installed +# --------------------------------------------------------------------------- + + +@contextmanager +def installed(*executables: str): + """Context manager: make is_installed report only *executables* as present.""" + exe_set = set(executables) + with patch.object( + tools.is_installed, "exists", side_effect=lambda x, **kw: x in exe_set + ): + yield + + +@contextmanager +def nothing_installed(): + """Context manager: make is_installed report nothing as present.""" + with patch.object(tools.is_installed, "exists", return_value=False): + yield + + +# --------------------------------------------------------------------------- +# Classification helpers +# --------------------------------------------------------------------------- + + +class TestIsUrl: + def test_https(self): + assert _is_url("https://example.com/install.sh") + + def test_http(self): + assert _is_url("http://example.com") + + def test_pip_not_url(self): + assert not _is_url("pip install foo") + + def test_curl_not_url(self): + assert not _is_url("curl -sSL https://example.com | sh") + + +class TestIsShellString: + def test_pipe(self): + assert _is_shell_string("curl -sSL https://x.sh | sh") + + def test_redirect(self): + assert _is_shell_string("echo y > /tmp/x") + + def test_and_and(self): + assert _is_shell_string("cd /tmp && ./install.sh") + + def test_plain_pip(self): + assert not _is_shell_string("pip install uv") + + def test_plain_brew(self): + assert not _is_shell_string("brew install uv") + + +class TestLeadingExecutable: + def test_pip(self): + assert _leading_executable("pip install foo") == "pip" + + def test_curl(self): + assert _leading_executable("curl -sSL https://example.com | sh") == "curl" + + def test_empty(self): + assert _leading_executable("") == "" + + +# --------------------------------------------------------------------------- +# _method_is_viable +# --------------------------------------------------------------------------- + + +class TestMethodIsViable: + def test_url_never_viable(self): + assert not _method_is_viable("https://example.com/install") + + def test_winget_only_on_windows(self): + with patch.object(tools, "_IS_POSIX", True): + assert not _method_is_viable("winget install --id=foo.Bar") + with patch.object(tools, "_IS_POSIX", False): + with installed("winget"): + assert _method_is_viable("winget install --id=foo.Bar") + + def test_shell_string_requires_posix(self): + with patch.object(tools, "_IS_POSIX", False): + with installed("curl"): + assert not _method_is_viable("curl -sSL https://x.sh | sh") + + def test_shell_string_requires_leading_executable_present(self): + with patch.object(tools, "_IS_POSIX", True): + with nothing_installed(): + assert not _method_is_viable("curl -sSL https://x.sh | sh") + with installed("curl"): + assert _method_is_viable("curl -sSL https://x.sh | sh") + + def test_plain_command_needs_executable_on_path(self): + with nothing_installed(): + assert not _method_is_viable("pip install foo") + with installed("pip"): + assert _method_is_viable("pip install foo") + + def test_brew_needs_brew_present(self): + with nothing_installed(): + assert not _method_is_viable("brew install foo") + with installed("brew"): + assert _method_is_viable("brew install foo") + + +# --------------------------------------------------------------------------- +# _rank_install_string +# --------------------------------------------------------------------------- + + +class TestRankInstallString: + def test_early_preference_ranks_lower(self): + prefs = ["uv", "conda", "pip"] + assert _rank_install_string("uv add foo", prefs) < _rank_install_string( + "pip install foo", prefs + ) + assert _rank_install_string("conda install foo", prefs) < _rank_install_string( + "pip install foo", prefs + ) + + def test_unknown_executable_ranks_last(self): + prefs = ["uv", "pip"] + rank_unknown = _rank_install_string("obscure-tool install foo", prefs) + assert rank_unknown == len(prefs) + + def test_same_installer_same_rank(self): + prefs = ["pip", "conda"] + r1 = _rank_install_string("pip install foo", prefs) + r2 = _rank_install_string("pip install bar --extra-index-url x", prefs) + assert r1 == r2 + + +# --------------------------------------------------------------------------- +# _preferred_install_methods +# --------------------------------------------------------------------------- + + +class TestPreferredInstallMethods: + def test_returns_list(self): + result = _preferred_install_methods() + assert isinstance(result, list) + assert len(result) > 0 + + def test_config_override(self): + with temp_conf(preferred_install_methods=["conda", "pip"]): + result = _preferred_install_methods() + assert result[:2] == ["conda", "pip"] + + def test_empty_config_uses_defaults(self): + with temp_conf(preferred_install_methods=[]): + result = _preferred_install_methods() + assert "pip" in result + assert "uv" in result + + def test_posix_excludes_winget_by_default(self): + with patch.object(tools, "_IS_POSIX", True): + result = _preferred_install_methods() + assert "winget" not in result + + def test_windows_includes_winget_by_default(self): + with patch.object(tools, "_IS_POSIX", False): + result = _preferred_install_methods() + assert "winget" in result + + +# --------------------------------------------------------------------------- +# choose_install_method +# --------------------------------------------------------------------------- + + +class TestChooseInstallMethod: + def test_unknown_tool_returns_none(self): + assert choose_install_method("nonexistent-tool-xyz") is None + + def test_returns_string_for_known_tool_with_viable_method(self): + with installed("pip"): + result = choose_install_method("uv") + assert result is not None + assert isinstance(result, str) + + def test_prefers_configured_installer_when_present(self): + # mlflow has "uv add mlflow" — with uv on PATH and uv preferred, + # that suggestion should be chosen over pip + with temp_conf(preferred_install_methods=["uv", "pip"]): + with installed("uv", "pip"): + result = choose_install_method("mlflow") + assert result is not None + assert result.startswith("uv") + + def test_falls_back_to_pip_when_uv_absent(self): + with temp_conf(preferred_install_methods=["uv", "pip"]): + with installed("pip"): + result = choose_install_method("uv") + assert result is not None + assert result.startswith("pip") + + def test_shell_string_chosen_when_only_curl_available(self): + """When only curl is on PATH, a curl|sh one-liner should be chosen.""" + info = ToolInfo( + name="test-shell-tool", + description="Test tool", + install_suggestions=[ + "pip install test-shell-tool", + "curl -sSL https://example.com/install.sh | sh", + ], + ) + with patch.dict(tools.TOOLS, {"test-shell-tool": info}): + with temp_conf(preferred_install_methods=["pip", "curl"]): + with ( + installed("curl"), + patch.object(tools, "_IS_POSIX", True), + ): + result = choose_install_method("test-shell-tool") + assert result is not None + assert "curl" in result + assert "|" in result + + def test_url_never_chosen(self): + info = ToolInfo( + name="url-only-tool", + description="Only has a URL install", + install_suggestions=["https://example.com/install"], + ) + with patch.dict(tools.TOOLS, {"url-only-tool": info}): + result = choose_install_method("url-only-tool") + assert result is None + + def test_preference_order_respected(self): + info = ToolInfo( + name="multi-method-tool", + description="Has several install methods", + install_suggestions=[ + "pip install multi-method-tool", + "conda install -c conda-forge multi-method-tool", + "brew install multi-method-tool", + ], + ) + with patch.dict(tools.TOOLS, {"multi-method-tool": info}): + with temp_conf(preferred_install_methods=["conda", "pip", "brew"]): + with installed("pip", "conda", "brew"): + result = choose_install_method("multi-method-tool") + assert result is not None + assert result.startswith("conda") + + def test_winget_not_chosen_on_posix(self): + info = ToolInfo( + name="win-tool", + description="Windows-only tool", + install_suggestions=["winget install --id=foo.Bar"], + ) + with patch.dict(tools.TOOLS, {"win-tool": info}): + with patch.object(tools, "_IS_POSIX", True): + result = choose_install_method("win-tool") + assert result is None + + +# --------------------------------------------------------------------------- +# install_tool +# --------------------------------------------------------------------------- + + +class TestInstallTool: + def test_raises_for_unknown_tool(self): + with pytest.raises(ValueError, match="Unknown tool"): + install_tool("nonexistent-tool-xyz") + + def test_raises_when_no_viable_method(self): + with ( + nothing_installed(), + patch.object(tools, "_IS_POSIX", return_value=False), + ): + with pytest.raises(RuntimeError, match="No viable install method"): + install_tool("uv") + + def test_plain_command_uses_subprocess_call_with_list(self): + """Non-shell install strings are called as a list (no shell=True).""" + with ( + installed("pip"), + patch("subprocess.call", return_value=0) as mock_call, + ): + with temp_conf(preferred_install_methods=["pip"]): + rc = install_tool("uv") + assert rc == 0 + mock_call.assert_called_once() + call_args, call_kwargs = mock_call.call_args + assert isinstance(call_args[0], list) + assert call_kwargs.get("shell") is not True + + def test_shell_string_uses_shell_true(self): + """Shell one-liners are run with shell=True.""" + info = ToolInfo( + name="shell-install-tool", + description="Installed via curl pipe", + install_suggestions=["curl -sSL https://example.com/install.sh | sh"], + ) + with patch.dict(tools.TOOLS, {"shell-install-tool": info}): + with ( + installed("curl"), + patch.object(tools, "_IS_POSIX", True), + patch("subprocess.call", return_value=0) as mock_call, + temp_conf(preferred_install_methods=["curl"]), + ): + rc = install_tool("shell-install-tool") + assert rc == 0 + mock_call.assert_called_once() + call_args, call_kwargs = mock_call.call_args + assert call_kwargs.get("shell") is True + assert isinstance(call_args[0], str) + + def test_returns_exit_code(self): + with ( + installed("pip"), + patch("subprocess.call", return_value=42), + temp_conf(preferred_install_methods=["pip"]), + ): + rc = install_tool("uv") + assert rc == 42 + + def test_non_zero_exit_code_is_propagated(self): + with ( + installed("pip"), + patch("subprocess.call", return_value=1), + temp_conf(preferred_install_methods=["pip"]), + ): + rc = install_tool("uv") + assert rc == 1