diff --git a/docs/source/api.rst b/docs/source/api.rst index 026c763..0555d43 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -35,13 +35,32 @@ User Classes content.metadata.Licensed proj.ai.AIEnabled proj.briefcase.Briefcase + proj.cicd.GitHubActions + proj.cicd.GitLabCI + proj.cicd.CircleCI + proj.cicd.Taskfile + proj.cicd.JustFile + proj.cicd.Tox proj.conda_package.CondaRecipe proj.conda_package.RattlerRecipe proj.conda_project.CondaProject proj.datapackage.DVCRepo proj.datapackage.DataPackage + proj.dataworkflows.Dbt + proj.dataworkflows.Quarto + proj.dataworkflows.Prefect + proj.dataworkflows.Dagster + proj.dataworkflows.Kedro + proj.dataworkflows.Metaflow + proj.dataworkflows.MLFlow + proj.dataworkflows.Airflow + proj.dataworkflows.Snakemake + proj.dataworkflows.Nox + proj.documentation.Docusaurus proj.documentation.MDBook + proj.documentation.MkDocs proj.documentation.RTD + proj.documentation.Sphinx proj.git.GitRepo proj.golang.Golang proj.helm.HelmChart @@ -51,6 +70,21 @@ User Classes proj.ide.NvidiaAIWorkbench proj.ide.VSCode proj.ide.Zed + proj.infra.DockerCompose + proj.infra.Terraform + proj.infra.Ansible + proj.infra.Pulumi + proj.infra.CDK + proj.infra.Earthfile + proj.infra.Nixpacks + proj.infra.Vagrant + proj.jsframeworks.NextJS + proj.jsframeworks.NuxtJS + proj.jsframeworks.SvelteKit + proj.jsframeworks.Vite + proj.jsframeworks.Pnpm + proj.jsframeworks.Bun + proj.jsframeworks.Deno proj.node.JLabExtension proj.node.Node proj.node.Yarn @@ -66,9 +100,12 @@ User Classes proj.uv.Uv proj.uv.UvScript proj.webapp.Django + proj.webapp.Gradio proj.webapp.Marimo + proj.webapp.Panel + proj.webapp.Shiny proj.webapp.Streamlit - proj.workflows.MLFlow + proj.dataworkflows.MLFlow .. autoclass:: projspec.artifact.container.Docker @@ -78,13 +115,31 @@ User Classes .. autoclass:: projspec.content.metadata.Licensed .. autoclass:: projspec.proj.ai.AIEnabled .. autoclass:: projspec.proj.briefcase.Briefcase +.. autoclass:: projspec.proj.cicd.GitHubActions +.. autoclass:: projspec.proj.cicd.GitLabCI +.. autoclass:: projspec.proj.cicd.CircleCI +.. autoclass:: projspec.proj.cicd.Taskfile +.. autoclass:: projspec.proj.cicd.JustFile +.. autoclass:: projspec.proj.cicd.Tox .. autoclass:: projspec.proj.conda_package.CondaRecipe .. autoclass:: projspec.proj.conda_package.RattlerRecipe .. autoclass:: projspec.proj.conda_project.CondaProject .. autoclass:: projspec.proj.datapackage.DVCRepo .. autoclass:: projspec.proj.datapackage.DataPackage +.. autoclass:: projspec.proj.dataworkflows.Dbt +.. autoclass:: projspec.proj.dataworkflows.Quarto +.. autoclass:: projspec.proj.dataworkflows.Prefect +.. autoclass:: projspec.proj.dataworkflows.Dagster +.. autoclass:: projspec.proj.dataworkflows.Kedro +.. autoclass:: projspec.proj.dataworkflows.Metaflow +.. autoclass:: projspec.proj.dataworkflows.Airflow +.. autoclass:: projspec.proj.dataworkflows.Snakemake +.. autoclass:: projspec.proj.dataworkflows.Nox +.. autoclass:: projspec.proj.documentation.Docusaurus .. autoclass:: projspec.proj.documentation.MDBook +.. autoclass:: projspec.proj.documentation.MkDocs .. autoclass:: projspec.proj.documentation.RTD +.. autoclass:: projspec.proj.documentation.Sphinx .. autoclass:: projspec.proj.git.GitRepo .. autoclass:: projspec.proj.golang.Golang .. autoclass:: projspec.proj.helm.HelmChart @@ -94,6 +149,21 @@ User Classes .. autoclass:: projspec.proj.ide.NvidiaAIWorkbench .. autoclass:: projspec.proj.ide.VSCode .. autoclass:: projspec.proj.ide.Zed +.. autoclass:: projspec.proj.infra.DockerCompose +.. autoclass:: projspec.proj.infra.Terraform +.. autoclass:: projspec.proj.infra.Ansible +.. autoclass:: projspec.proj.infra.Pulumi +.. autoclass:: projspec.proj.infra.CDK +.. autoclass:: projspec.proj.infra.Earthfile +.. autoclass:: projspec.proj.infra.Nixpacks +.. autoclass:: projspec.proj.infra.Vagrant +.. autoclass:: projspec.proj.jsframeworks.NextJS +.. autoclass:: projspec.proj.jsframeworks.NuxtJS +.. autoclass:: projspec.proj.jsframeworks.SvelteKit +.. autoclass:: projspec.proj.jsframeworks.Vite +.. autoclass:: projspec.proj.jsframeworks.Pnpm +.. autoclass:: projspec.proj.jsframeworks.Bun +.. autoclass:: projspec.proj.jsframeworks.Deno .. autoclass:: projspec.proj.node.JLabExtension .. autoclass:: projspec.proj.node.Node .. autoclass:: projspec.proj.node.Yarn @@ -109,9 +179,12 @@ User Classes .. autoclass:: projspec.proj.uv.Uv .. autoclass:: projspec.proj.uv.UvScript .. autoclass:: projspec.proj.webapp.Django +.. autoclass:: projspec.proj.webapp.Gradio .. autoclass:: projspec.proj.webapp.Marimo +.. autoclass:: projspec.proj.webapp.Panel +.. autoclass:: projspec.proj.webapp.Shiny .. autoclass:: projspec.proj.webapp.Streamlit -.. autoclass:: projspec.proj.workflows.MLFlow +.. autoclass:: projspec.proj.dataworkflows.MLFlow Contents @@ -133,6 +206,9 @@ User Classes ~~~~~~~~~~~~ .. autosummary:: + content.cicd.CIWorkflow + content.cicd.PipelineStage + content.cicd.ServiceDependency content.data.IntakeSource content.data.TabularData content.data.DataResource @@ -145,6 +221,9 @@ User Classes content.package.PythonPackage content.package.RustModule +.. autoclass:: projspec.content.cicd.CIWorkflow +.. autoclass:: projspec.content.cicd.PipelineStage +.. autoclass:: projspec.content.cicd.ServiceDependency .. autoclass:: projspec.content.data.IntakeSource .. autoclass:: projspec.content.data.DataResource .. autoclass:: projspec.content.data.TabularData @@ -182,6 +261,9 @@ User Classes artifact.container.DockerRuntime artifact.deployment.Deployment artifact.deployment.HelmDeployment + artifact.infra.ComposeStack + artifact.infra.StaticSite + artifact.infra.TerraformPlan artifact.installable.CondaPackage artifact.installable.SystemInstallablePackage artifact.installable.Wheel @@ -199,6 +281,9 @@ User Classes :members: .. autoclass:: projspec.artifact.deployment.HelmDeployment :members: +.. autoclass:: projspec.artifact.infra.ComposeStack +.. autoclass:: projspec.artifact.infra.StaticSite +.. autoclass:: projspec.artifact.infra.TerraformPlan .. autoclass:: projspec.artifact.installable.CondaPackage .. autoclass:: projspec.artifact.installable.SystemInstallablePackage .. autoclass:: projspec.artifact.installable.Wheel diff --git a/src/projspec/artifact/__init__.py b/src/projspec/artifact/__init__.py index 230edb1..c75695f 100644 --- a/src/projspec/artifact/__init__.py +++ b/src/projspec/artifact/__init__.py @@ -3,6 +3,7 @@ from projspec.artifact.base import BaseArtifact, FileArtifact from projspec.artifact.container import DockerImage from projspec.artifact.deployment import Deployment, HelmDeployment +from projspec.artifact.infra import ComposeStack, StaticSite, TerraformPlan from projspec.artifact.installable import CondaPackage, Wheel from projspec.artifact.linter import PreCommit from projspec.artifact.process import Process @@ -11,9 +12,12 @@ __all__ = [ "BaseArtifact", "FileArtifact", + "ComposeStack", "DockerImage", "Deployment", "HelmDeployment", + "StaticSite", + "TerraformPlan", "CondaPackage", "Wheel", "Process", @@ -21,4 +25,5 @@ "CondaEnv", "VirtualEnv", "LockFile", + "PreCommit", ] diff --git a/src/projspec/artifact/infra.py b/src/projspec/artifact/infra.py new file mode 100644 index 0000000..8ffc543 --- /dev/null +++ b/src/projspec/artifact/infra.py @@ -0,0 +1,71 @@ +"""Infrastructure and deployment artifact types.""" + +from projspec.artifact.base import BaseArtifact, FileArtifact +from projspec.proj.base import Project +from projspec.utils import run_subprocess + + +class ComposeStack(BaseArtifact): + """A multi-service stack managed by Docker Compose. + + ``make()`` runs ``docker compose up -d`` + ``clean()`` runs ``docker compose down`` + ``state`` is inferred by ``docker compose ps`` (checks for running services). + """ + + def __init__(self, proj: Project, file: str = "docker-compose.yml", **kwargs): + self.compose_file = file + cmd = ["docker", "compose", "-f", file, "up", "-d"] + super().__init__(proj, cmd=cmd, **kwargs) + + def _make(self, **kwargs): + run_subprocess(self.cmd, cwd=self.proj.url, output=False, **kwargs) + + def clean(self): + run_subprocess( + ["docker", "compose", "-f", self.compose_file, "down"], + cwd=self.proj.url, + output=False, + ) + + def _is_done(self) -> bool: + try: + result = run_subprocess( + ["docker", "compose", "-f", self.compose_file, "ps", "-q"], + cwd=self.proj.url, + ) + return bool(result.stdout.strip()) + except Exception: + return False + + def _is_clean(self) -> bool: + return not self._is_done() + + +class StaticSite(FileArtifact): + """A static website produced by a build tool (MkDocs, Sphinx, Docusaurus, Quarto, etc.). + + ``fn`` should be the glob pattern for the output index file, e.g. + ``/site/index.html``. + """ + + pass + + +class TerraformPlan(FileArtifact): + """A saved Terraform execution plan file (``terraform plan -out plan.tfplan``). + + ``make()`` runs ``terraform plan -out plan.tfplan`` + ``clean()`` deletes the plan file + """ + + def __init__(self, proj: Project, plan_file: str = "plan.tfplan", **kwargs): + fn = f"{proj.url}/{plan_file}" + cmd = ["terraform", "plan", "-out", plan_file] + super().__init__(proj, fn=fn, cmd=cmd, **kwargs) + + def clean(self): + try: + self.proj.fs.rm(self.fn) + except FileNotFoundError: + pass diff --git a/src/projspec/config.py b/src/projspec/config.py index 11cbee2..5d83d70 100644 --- a/src/projspec/config.py +++ b/src/projspec/config.py @@ -20,6 +20,7 @@ def defaults(): "scan_max_size": 5 * 2**10, "remote_artifact_status": False, "capture_artifact_output": True, + "preferred_install_methods": ["conda", "pip"], } @@ -33,6 +34,10 @@ def defaults(): "if True, capture and enqueue output from spawned Process artifacts. " "Otherwise, output appears on stdout/err." ), + "preferred_install_methods": ( + "ordered list of preferred installer names for install_tool(), " + "e.g. ['uv', 'conda', 'pip']. Empty list uses the platform default." + ), } diff --git a/src/projspec/content/__init__.py b/src/projspec/content/__init__.py index 9fc87c9..6c38261 100644 --- a/src/projspec/content/__init__.py +++ b/src/projspec/content/__init__.py @@ -1,6 +1,12 @@ """Contents classes - information declared in project specs""" from projspec.content.base import BaseContent +from projspec.content.cicd import ( + CIWorkflow, + GithubAction, + PipelineStage, + ServiceDependency, +) from projspec.content.data import TabularData, IntakeSource from projspec.content.env_var import EnvironmentVariables from projspec.content.environment import Environment, Stack, Precision @@ -12,6 +18,10 @@ __all__ = [ "BaseContent", + "CIWorkflow", + "GithubAction", + "PipelineStage", + "ServiceDependency", "TabularData", "IntakeSource", "EnvironmentVariables", diff --git a/src/projspec/content/cicd.py b/src/projspec/content/cicd.py index 789c655..596e808 100644 --- a/src/projspec/content/cicd.py +++ b/src/projspec/content/cicd.py @@ -1,15 +1,45 @@ """Run definitions that are part of code productionalisation""" +from dataclasses import dataclass, field + from projspec.content import BaseContent -class GithubAction(BaseContent): - """A run prescription that runs in github on push/merge""" +@dataclass +class CIWorkflow(BaseContent): + """A CI/CD workflow or pipeline definition. + + Captures the name, triggering events, and high-level job/stage names from + CI configuration files (GitHub Actions, GitLab CI, CircleCI, etc.). + """ + + name: str = "" + triggers: list = field(default_factory=list) + jobs: list = field(default_factory=list) + provider: str = "" # e.g. "github", "gitlab", "circleci" + + +# Keep legacy stub under old name for backwards compatibility +GithubAction = CIWorkflow + + +@dataclass +class PipelineStage(BaseContent): + """A named stage or step in a data/ML/workflow pipeline.""" + + name: str = "" + cmd: list = field(default_factory=list) + depends_on: list = field(default_factory=list) + - # TODO: we probably want to extract out the jobs and runs, maybe the steps. - # It may be interesting to provide links to the browser or API to view - # details. - ... +@dataclass +class ServiceDependency(BaseContent): + """An external service that a project depends on at runtime. + Typically exposed via an open TCP port, e.g., as used in container orchestration. + """ -# TODO: there are many of these, but we don't extract much information from them + name: str = "" + service_type: str = "" # e.g. "postgres", "redis", "kafka" + version: str = "" + image: str = "" diff --git a/src/projspec/content/data.py b/src/projspec/content/data.py index 5b46317..2106553 100644 --- a/src/projspec/content/data.py +++ b/src/projspec/content/data.py @@ -92,6 +92,8 @@ def __repr__(self) -> str: def _repr_html_(self) -> str: """Jupyter rich display — returns cached HTML, rendering on first call.""" + # TODO: this is probably not what we want jupyter to dysplay, but it's + # convenient for now. if self._html is None: from projspec.content.data_html import repr_html diff --git a/src/projspec/proj/__init__.py b/src/projspec/proj/__init__.py index 929fb17..94560da 100644 --- a/src/projspec/proj/__init__.py +++ b/src/projspec/proj/__init__.py @@ -5,16 +5,55 @@ from projspec.proj.ai import AIEnabled from projspec.proj.backstage import BackstageCatalog from projspec.proj.briefcase import Briefcase +from projspec.proj.cicd import ( + CircleCI, + GitHubActions, + GitLabCI, + JustFile, + Taskfile, + Tox, +) from projspec.proj.conda_package import CondaRecipe, RattlerRecipe from projspec.proj.conda_project import CondaProject from projspec.proj.data_dir import Data from projspec.proj.datapackage import DataPackage, DVCRepo -from projspec.proj.documentation import RTD, MDBook +from projspec.proj.dataworkflows import ( + Airflow, + Dagster, + Dbt, + Kedro, + Metaflow, + MLFlow, + Nox, + Prefect, + Quarto, + Snakemake, +) +from projspec.proj.documentation import RTD, MDBook, MkDocs, Sphinx, Docusaurus from projspec.proj.git import GitRepo from projspec.proj.golang import Golang from projspec.proj.helm import HelmChart from projspec.proj.hf import HuggingFaceRepo from projspec.proj.ide import JetbrainsIDE, NvidiaAIWorkbench, VSCode +from projspec.proj.infra import ( + Ansible, + CDK, + DockerCompose, + Earthfile, + Nixpacks, + Pulumi, + Terraform, + Vagrant, +) +from projspec.proj.jsframeworks import ( + Bun, + Deno, + NextJS, + NuxtJS, + Pnpm, + SvelteKit, + Vite, +) from projspec.proj.node import JLabExtension, Node, Yarn from projspec.proj.pixi import Pixi from projspec.proj.poetry import Poetry @@ -23,44 +62,95 @@ from projspec.proj.python_code import PythonCode, PythonLibrary from projspec.proj.rust import Rust, RustPython from projspec.proj.uv import Uv -from projspec.proj.webapp import Django, Marimo, Streamlit -from projspec.proj.workflows import MLFlow +from projspec.proj.webapp import Django, Gradio, Marimo, Shiny, Streamlit __all__ = [ "ParseFailed", "Project", "ProjectSpec", - "AIEnabled", - "BackstageCatalog", - "Briefcase", - "Cited", - "Zenodo", + # CI/CD + "CircleCI", + "GitHubActions", + "GitLabCI", + "JustFile", + "Taskfile", + "Tox", + # Conda "CondaRecipe", "CondaProject", + # Data "Data", - "Golang", + "DataPackage", + "DVCRepo", + # Data/ML workflows + "Airflow", + "Dagster", + "Dbt", + "Kedro", + "Metaflow", + "MLFlow", + "Nox", + "Prefect", + "Quarto", + "Snakemake", + # Documentation + "Docusaurus", + "MkDocs", + "MDBook", + "RTD", + "Sphinx", + # Git "GitRepo", + # Go + "Golang", + # Helm/K8s "HelmChart", + # HuggingFace "HuggingFaceRepo", + # IDE + "AIEnabled", + "BackstageCatalog", + "Briefcase", + "Cited", + "Zenodo", "JetbrainsIDE", - "JLabExtension", - "Marimo", - "MDBook", - "MLFlow", "NvidiaAIWorkbench", + "VSCode", + # Infrastructure + "Ansible", + "CDK", + "DockerCompose", + "Earthfile", + "Nixpacks", + "Pulumi", + "Terraform", + "Vagrant", + # JavaScript frameworks + "Bun", + "Deno", + "NextJS", + "NuxtJS", + "Pnpm", + "SvelteKit", + "Vite", + # Node + "JLabExtension", "Node", - "Poetry", - "RattlerRecipe", + "Yarn", + # Python packaging "Pixi", + "Poetry", "PyScript", "PythonCode", "PythonLibrary", - "RTD", + "Uv", + # Rust "Rust", "RustPython", + # Web apps "Django", + "Gradio", + "Marimo", + "Shiny", "Streamlit", - "Uv", - "VSCode", - "Yarn", ] diff --git a/src/projspec/proj/cicd.py b/src/projspec/proj/cicd.py new file mode 100644 index 0000000..8ddbb49 --- /dev/null +++ b/src/projspec/proj/cicd.py @@ -0,0 +1,430 @@ +"""CI/CD project specs: GitHub Actions, GitLab CI, CircleCI, Taskfile, JustFile, Tox.""" + +import os + +import yaml + +from projspec.proj.base import ParseFailed, ProjectSpec, ProjectExtra +from projspec.utils import AttrDict + + +class GitHubActions(ProjectExtra): + """GitHub Actions CI/CD workflows + + Each YAML file under .github/workflows/ defines one workflow. + """ + + spec_doc = "https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions" + + def match(self) -> bool: + # Check for the .github/workflows directory + workflows_dir = f"{self.proj.url}/.github/workflows" + try: + entries = self.proj.fs.ls(workflows_dir, detail=False) + return any(e.endswith((".yml", ".yaml")) for e in entries) + except (FileNotFoundError, NotADirectoryError, Exception): + return False + + def parse(self) -> None: + from projspec.content.cicd import CIWorkflow + + workflows_dir = f"{self.proj.url}/.github/workflows" + try: + entries = self.proj.fs.ls(workflows_dir, detail=False) + except Exception as exc: + raise ParseFailed(f"Could not list .github/workflows: {exc}") from exc + + ci_workflows = AttrDict() + for entry in entries: + if not entry.endswith((".yml", ".yaml")): + continue + try: + with self.proj.fs.open(entry, "rt") as f: + wf = yaml.safe_load(f) + except Exception: + continue + if not isinstance(wf, dict): + continue + + name = wf.get( + "name", + os.path.basename(entry).replace(".yml", "").replace(".yaml", ""), + ) + on = wf.get("on", wf.get(True, {})) # 'on' is a YAML boolean alias + triggers = [] + if isinstance(on, dict): + triggers = list(on.keys()) + elif isinstance(on, list): + triggers = on + elif isinstance(on, str): + triggers = [on] + + jobs = list(wf.get("jobs", {}).keys()) + key = name.lower().replace(" ", "_").replace("-", "_") + ci_workflows[key] = CIWorkflow( + proj=self.proj, + name=name, + triggers=[str(t) for t in triggers], + jobs=jobs, + provider="github", + ) + + if not ci_workflows: + raise ParseFailed("No valid GitHub Actions workflows found") + + self._contents = AttrDict(ci_workflow=ci_workflows) + self._artifacts = AttrDict() + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal GitHub Actions CI workflow.""" + workflows_dir = os.path.join(path, ".github", "workflows") + os.makedirs(workflows_dir, exist_ok=True) + with open(os.path.join(workflows_dir, "ci.yml"), "wt") as f: + f.write( + "name: CI\n" + "\n" + "on:\n" + " push:\n" + " branches: [main]\n" + " pull_request:\n" + " branches: [main]\n" + "\n" + "jobs:\n" + " test:\n" + " runs-on: ubuntu-latest\n" + " steps:\n" + " - uses: actions/checkout@v4\n" + " - name: Run tests\n" + " run: echo 'Add your test command here'\n" + ) + + +class GitLabCI(ProjectExtra): + """GitLab CI/CD pipeline defined in .gitlab-ci.yml.""" + + spec_doc = "https://docs.gitlab.com/ci/yaml/" + + def match(self) -> bool: + return ".gitlab-ci.yml" in self.proj.basenames + + def parse(self) -> None: + from projspec.content.cicd import CIWorkflow + + try: + with self.proj.get_file(".gitlab-ci.yml") as f: + cfg = yaml.safe_load(f) + except Exception as exc: + raise ParseFailed(f"Could not read .gitlab-ci.yml: {exc}") from exc + + if not isinstance(cfg, dict): + raise ParseFailed(".gitlab-ci.yml did not parse to a mapping") + + stages = cfg.get("stages", []) + # Jobs are any top-level keys that are not reserved keywords + reserved = { + "stages", + "variables", + "include", + "workflow", + "default", + "image", + "services", + "before_script", + "after_script", + "cache", + } + jobs = [k for k in cfg if k not in reserved and not k.startswith(".")] + + self._contents = AttrDict( + ci_workflow=CIWorkflow( + proj=self.proj, + name="GitLab CI", + triggers=stages, + jobs=jobs, + provider="gitlab", + ) + ) + self._artifacts = AttrDict() + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal .gitlab-ci.yml.""" + with open(os.path.join(path, ".gitlab-ci.yml"), "wt") as f: + f.write( + "stages:\n" + " - test\n" + "\n" + "test:\n" + " stage: test\n" + " script:\n" + " - echo 'Add your test command here'\n" + ) + + +class CircleCI(ProjectExtra): + """CircleCI pipeline defined in .circleci/config.yml.""" + + spec_doc = "https://circleci.com/docs/configuration-reference/" + + def match(self) -> bool: + config_path = f"{self.proj.url}/.circleci/config.yml" + try: + return self.proj.fs.isfile(config_path) + except Exception: + return False + + def parse(self) -> None: + from projspec.content.cicd import CIWorkflow + + config_path = f"{self.proj.url}/.circleci/config.yml" + try: + with self.proj.fs.open(config_path, "rt") as f: + cfg = yaml.safe_load(f) + except Exception as exc: + raise ParseFailed(f"Could not read .circleci/config.yml: {exc}") from exc + + if not isinstance(cfg, dict): + raise ParseFailed(".circleci/config.yml did not parse to a mapping") + + jobs = list(cfg.get("jobs", {}).keys()) + workflows = cfg.get("workflows", {}) + workflow_names = [k for k in workflows if k != "version"] + + self._contents = AttrDict( + ci_workflow=CIWorkflow( + proj=self.proj, + name="CircleCI", + triggers=workflow_names, + jobs=jobs, + provider="circleci", + ) + ) + self._artifacts = AttrDict() + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal CircleCI config.""" + circleci_dir = os.path.join(path, ".circleci") + os.makedirs(circleci_dir, exist_ok=True) + with open(os.path.join(circleci_dir, "config.yml"), "wt") as f: + f.write( + "version: 2.1\n" + "\n" + "jobs:\n" + " test:\n" + " docker:\n" + " - image: cimg/base:stable\n" + " steps:\n" + " - checkout\n" + " - run: echo 'Add your test command here'\n" + "\n" + "workflows:\n" + " main:\n" + " jobs:\n" + " - test\n" + ) + + +class Taskfile(ProjectSpec): + """Task runner using Taskfile (go-task). + + Taskfile.yml (or Taskfile.yaml / taskfile.yml) defines named tasks that + can be run with `task `. + """ + + spec_doc = "https://taskfile.dev/reference/schema/" + + _NAMES = {"Taskfile.yml", "Taskfile.yaml", "taskfile.yml", "taskfile.yaml"} + + def match(self) -> bool: + return bool(self._NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.process import Process + from projspec.content.executable import Command + + fname = next(n for n in self._NAMES if n in self.proj.basenames) + try: + with self.proj.get_file(fname) as f: + cfg = yaml.safe_load(f) + except Exception as exc: + raise ParseFailed(f"Could not read {fname}: {exc}") from exc + + if not isinstance(cfg, dict): + raise ParseFailed(f"{fname} did not parse to a mapping") + + tasks = cfg.get("tasks", {}) + cmds = AttrDict() + arts = AttrDict() + for task_name, task_def in tasks.items(): + if not task_name or task_name.startswith("_"): + continue + cmd = ["task", task_name] + cmds[task_name] = Command(proj=self.proj, cmd=cmd) + arts[task_name] = Process(proj=self.proj, cmd=cmd) + + self._contents = AttrDict(command=cmds) if cmds else AttrDict() + self._artifacts = AttrDict(process=arts) if arts else AttrDict() + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Taskfile.yml.""" + with open(os.path.join(path, "Taskfile.yml"), "wt") as f: + f.write( + "version: '3'\n" + "\n" + "tasks:\n" + " default:\n" + " desc: Default task\n" + " cmds:\n" + " - echo 'Hello from Taskfile!'\n" + "\n" + " test:\n" + " desc: Run tests\n" + " cmds:\n" + " - echo 'Add your test command here'\n" + ) + + +class JustFile(ProjectSpec): + """Task runner using Just (justfile / Justfile). + + A justfile defines named recipes that can be run with `just `. + """ + + spec_doc = "https://just.systems/man/en/" + + _NAMES = {"justfile", "Justfile", ".justfile"} + + def match(self) -> bool: + return bool(self._NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + import re + from projspec.artifact.process import Process + from projspec.content.executable import Command + + fname = next(n for n in self._NAMES if n in self.proj.basenames) + try: + with self.proj.get_file(fname) as f: + text = f.read() + except Exception as exc: + raise ParseFailed(f"Could not read {fname}: {exc}") from exc + + # Recipes are lines matching: recipe-name ...: (not starting with #/@/space) + recipe_names = re.findall( + r"^([a-zA-Z_][a-zA-Z0-9_-]*)(?:\s.*)?:", text, re.MULTILINE + ) + + cmds = AttrDict() + arts = AttrDict() + for name in recipe_names: + cmd = ["just", name] + cmds[name] = Command(proj=self.proj, cmd=cmd) + arts[name] = Process(proj=self.proj, cmd=cmd) + + self._contents = AttrDict(command=cmds) if cmds else AttrDict() + self._artifacts = AttrDict(process=arts) if arts else AttrDict() + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal justfile.""" + with open(os.path.join(path, "justfile"), "wt") as f: + f.write( + "# Default recipe\n" + "default:\n" + " echo 'Hello from just!'\n" + "\n" + "# Run tests\n" + "test:\n" + " echo 'Add your test command here'\n" + ) + + +class Tox(ProjectSpec): + """Python test automation using tox. + + A set of environments and run commands to be run as a workflow. + """ + + spec_doc = "https://tox.wiki/en/stable/config.html" + + def match(self) -> bool: + if "tox.ini" in self.proj.basenames or "tox.toml" in self.proj.basenames: + return True + return bool(self.proj.pyproject.get("tool", {}).get("tox")) + + def parse(self) -> None: + import configparser + import re + from projspec.artifact.process import Process + from projspec.content.executable import Command + + env_names: list[str] = [] + + if "tox.ini" in self.proj.basenames: + try: + with self.proj.get_file("tox.ini") as f: + text = f.read() + cfg = configparser.ConfigParser() + cfg.read_string(text) + # envlist can be a comma/space/newline separated list with optional braces + envlist_raw = cfg.get("tox", "envlist", fallback="") + if envlist_raw: + # Strip braces notation like {py39,py310}-django + flat = re.sub(r"\{[^}]*\}", "", envlist_raw) + env_names = [ + e.strip() for e in re.split(r"[,\s]+", flat) if e.strip() + ] + # Also pick up [testenv:*] sections + for section in cfg.sections(): + if section.startswith("testenv:"): + name = section[len("testenv:") :] + if name not in env_names: + env_names.append(name) + except Exception as exc: + raise ParseFailed(f"Could not parse tox.ini: {exc}") from exc + + elif "tox.toml" in self.proj.basenames: + try: + import toml + from projspec.utils import PickleableTomlDecoder + + with self.proj.get_file("tox.toml", text=False) as f: + cfg = toml.loads(f.read().decode(), decoder=PickleableTomlDecoder()) + env_names = list(cfg.get("env", {}).keys()) + except Exception as exc: + raise ParseFailed(f"Could not parse tox.toml: {exc}") from exc + + else: + tox_cfg = self.proj.pyproject.get("tool", {}).get("tox", {}) + env_names = list(tox_cfg.get("env", {}).keys()) + + cmds = AttrDict() + arts = AttrDict() + if not env_names: + # At minimum expose a generic tox run + cmds["tox"] = Command(proj=self.proj, cmd=["tox"]) + arts["tox"] = Process(proj=self.proj, cmd=["tox"]) + else: + for name in env_names: + cmd = ["tox", "-e", name] + cmds[name] = Command(proj=self.proj, cmd=cmd) + arts[name] = Process(proj=self.proj, cmd=cmd) + + self._contents = AttrDict(command=cmds) + self._artifacts = AttrDict(process=arts) + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal tox.ini.""" + with open(os.path.join(path, "tox.ini"), "wt") as f: + f.write( + "[tox]\n" + "envlist = py311\n" + "\n" + "[testenv]\n" + "deps = pytest\n" + "commands = pytest {posargs}\n" + ) diff --git a/src/projspec/proj/datapackage.py b/src/projspec/proj/datapackage.py index ffba4ac..2191a16 100644 --- a/src/projspec/proj/datapackage.py +++ b/src/projspec/proj/datapackage.py @@ -4,7 +4,7 @@ class DataPackage(ProjectSpec): - # by frictionless data + """A FrictionlessData datapackage spec""" spec_doc = "https://datapackage.org/standard/data-package/#structure" # e.g., as exported by zenodo diff --git a/src/projspec/proj/dataworkflows.py b/src/projspec/proj/dataworkflows.py new file mode 100644 index 0000000..58d9443 --- /dev/null +++ b/src/projspec/proj/dataworkflows.py @@ -0,0 +1,800 @@ +"""Data/ML workflow specs: dbt, Quarto, Prefect, Dagster, Kedro, Airflow, Snakemake, Nox, Metaflow, MLFlow.""" + +import os +import re + +import yaml + +from projspec.proj.base import ParseFailed, ProjectSpec +from projspec.utils import AttrDict + + +class Dbt(ProjectSpec): + """dbt (data build tool) project. + + dbt is used for data ingestion, validation, and transform. + + The spec dbt about the context of your project and how to transform your data + (build your data sets). + """ + + spec_doc = "https://docs.getdbt.com/reference/dbt_project.yml" + + def match(self) -> bool: + return "dbt_project.yml" in self.proj.basenames + + def parse(self) -> None: + from projspec.artifact.process import Process + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + try: + with self.proj.get_file("dbt_project.yml") as f: + cfg = yaml.safe_load(f) + except Exception as exc: + raise ParseFailed(f"Could not read dbt_project.yml: {exc}") from exc + + if not isinstance(cfg, dict): + raise ParseFailed("dbt_project.yml did not parse to a mapping") + + meta: dict[str, str] = {} + for key in ("name", "version", "profile"): + if val := cfg.get(key): + meta[key] = str(val) + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + # Standard dbt commands + dbt_cmds = { + "run": ["dbt", "run"], + "test": ["dbt", "test"], + "build": ["dbt", "build"], + "compile": ["dbt", "compile"], + "docs_generate": ["dbt", "docs", "generate"], + "docs_serve": ["dbt", "docs", "serve"], + "seed": ["dbt", "seed"], + "snapshot": ["dbt", "snapshot"], + "source_freshness": ["dbt", "source", "freshness"], + } + + cmds = AttrDict() + arts = AttrDict() + for name, cmd in dbt_cmds.items(): + cmds[name] = Command(proj=self.proj, cmd=cmd) + arts[name] = Process(proj=self.proj, cmd=cmd) + + conts["command"] = cmds + self._contents = conts + self._artifacts = AttrDict(process=arts) + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal dbt project.""" + name = os.path.basename(path) + with open(os.path.join(path, "dbt_project.yml"), "wt") as f: + f.write( + f"name: '{name}'\n" + "version: '1.0.0'\n" + "config-version: 2\n" + "\n" + "profile: 'default'\n" + "\n" + "model-paths: ['models']\n" + "seed-paths: ['seeds']\n" + "test-paths: ['tests']\n" + "snapshot-paths: ['snapshots']\n" + "\n" + "models:\n" + f" {name}:\n" + " +materialized: view\n" + ) + os.makedirs(os.path.join(path, "models"), exist_ok=True) + with open(os.path.join(path, "models", "example.sql"), "wt") as f: + f.write("SELECT 1 AS id, 'hello' AS greeting\n") + + +class Quarto(ProjectSpec): + """Quarto publishing system project.""" + + spec_doc = "https://quarto.org/docs/reference/projects/core.html" + + def match(self) -> bool: + if ( + "_quarto.yml" in self.proj.basenames + or "_quarto.yaml" in self.proj.basenames + ): + return True + return any(n.endswith(".qmd") for n in self.proj.basenames) + + def parse(self) -> None: + from projspec.artifact.infra import StaticSite + from projspec.artifact.process import Server + from projspec.content.metadata import DescriptiveMetadata + + cfg: dict = {} + for fname in ("_quarto.yml", "_quarto.yaml"): + if fname in self.proj.basenames: + try: + with self.proj.get_file(fname) as f: + cfg = yaml.safe_load(f) or {} + except Exception: + pass + break + + meta: dict[str, str] = {} + project = cfg.get("project", {}) + for key in ("title", "type"): + if val := project.get(key): + meta[key] = str(val) + book = cfg.get("book", {}) + for key in ("title", "author"): + if val := book.get(key): + meta[key] = str(val) + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + output_dir = project.get("output-dir", "_site") + arts = AttrDict( + render=StaticSite( + proj=self.proj, + cmd=["quarto", "render"], + fn=f"{self.proj.url}/{output_dir}/index.html", + ), + preview=Server( + proj=self.proj, + cmd=["quarto", "preview"], + ), + ) + + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Quarto project.""" + name = os.path.basename(path) + with open(os.path.join(path, "_quarto.yml"), "wt") as f: + f.write( + "project:\n" + " type: website\n" + "\n" + "website:\n" + f' title: "{name}"\n' + " navbar:\n" + " left:\n" + " - href: index.qmd\n" + " text: Home\n" + "\n" + "format:\n" + " html:\n" + " theme: cosmo\n" + ) + with open(os.path.join(path, "index.qmd"), "wt") as f: + f.write( + "---\n" + f'title: "{name}"\n' + "---\n" + "\n" + "Welcome to this Quarto project.\n" + ) + + +class Prefect(ProjectSpec): + """Prefect workflow orchestration project.""" + + spec_doc = "https://docs.prefect.io/v3/deploy/infrastructure-concepts/prefect-yaml" + + def match(self) -> bool: + return "prefect.yaml" in self.proj.basenames + + def parse(self) -> None: + from projspec.artifact.process import Process + from projspec.content.cicd import PipelineStage + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + try: + with self.proj.get_file("prefect.yaml") as f: + cfg = yaml.safe_load(f) + except Exception as exc: + raise ParseFailed(f"Could not read prefect.yaml: {exc}") from exc + + if not isinstance(cfg, dict): + raise ParseFailed("prefect.yaml did not parse to a mapping") + + meta: dict[str, str] = {} + if name := cfg.get("name"): + meta["name"] = str(name) + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + # Deployments become pipeline stages + deployments = cfg.get("deployments", []) + stages = AttrDict() + arts = AttrDict() + cmds = AttrDict() + for dep in deployments: + if not isinstance(dep, dict): + continue + dep_name = dep.get("name", "default") + entrypoint = dep.get("entrypoint", "") + stages[dep_name] = PipelineStage( + proj=self.proj, + name=dep_name, + cmd=["prefect", "deployment", "run", dep_name], + ) + deploy_cmd = ["prefect", "deploy", "--name", dep_name] + cmds[dep_name] = Command(proj=self.proj, cmd=deploy_cmd) + arts[dep_name] = Process(proj=self.proj, cmd=deploy_cmd) + + if stages: + conts["pipeline_stage"] = stages + if cmds: + conts["command"] = cmds + + # Generic run command + arts["run"] = Process(proj=self.proj, cmd=["prefect", "run"]) + + self._contents = conts + self._artifacts = AttrDict(process=arts) + + +class Dagster(ProjectSpec): + """Dagster data orchestration project.""" + + spec_doc = "https://docs.dagster.io/api/python-api/workspace" + + def match(self) -> bool: + if self.proj.pyproject.get("tool", {}).get("dagster"): + return True + return bool( + {"dagster.yaml", "workspace.yaml"}.intersection(self.proj.basenames) + ) + + def parse(self) -> None: + from projspec.artifact.process import Process, Server + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + meta: dict[str, str] = {} + dagster_cfg = self.proj.pyproject.get("tool", {}).get("dagster", {}) + if isinstance(dagster_cfg, dict): + if module := dagster_cfg.get("module_name"): + meta["module"] = str(module) + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + # Core commands + dbt_cmds = { + "dev": ["dagster", "dev"], + "materialize": ["dagster", "asset", "materialize", "--select", "*"], + } + cmds = AttrDict() + arts = AttrDict() + for name, cmd in dbt_cmds.items(): + cmds[name] = Command(proj=self.proj, cmd=cmd) + + arts["dev"] = Server(proj=self.proj, cmd=["dagster", "dev"]) + arts["materialize"] = Process( + proj=self.proj, + cmd=["dagster", "asset", "materialize", "--select", "*"], + ) + + conts["command"] = cmds + self._contents = conts + self._artifacts = arts + + +class Kedro(ProjectSpec): + """Kedro data science pipeline project.""" + + spec_doc = "https://docs.kedro.org/en/stable/kedro_project_setup/settings.html" + + def match(self) -> bool: + return bool(self.proj.pyproject.get("tool", {}).get("kedro")) + + def parse(self) -> None: + from projspec.artifact.process import Process, Server + from projspec.content.cicd import PipelineStage + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + kedro_cfg = self.proj.pyproject.get("tool", {}).get("kedro", {}) + + meta: dict[str, str] = {} + for key in ("package_name", "project_name", "kedro_init_version"): + if val := kedro_cfg.get(key): + meta[key] = str(val) + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + # Look for pipeline definitions in src//pipelines/ + package_name = kedro_cfg.get("package_name", "") + pipeline_names: list[str] = [] + if package_name: + pipelines_dir = f"{self.proj.url}/src/{package_name}/pipelines" + try: + entries = self.proj.fs.ls(pipelines_dir, detail=False) + pipeline_names = [ + os.path.basename(e) + for e in entries + if self.proj.fs.isdir(e) and not os.path.basename(e).startswith("_") + ] + except Exception: + pass + + cmds = AttrDict() + arts = AttrDict() + stages = AttrDict() + + # Default pipeline + cmds["run"] = Command(proj=self.proj, cmd=["kedro", "run"]) + arts["run"] = Process(proj=self.proj, cmd=["kedro", "run"]) + + for pipeline in pipeline_names: + cmd = ["kedro", "run", "--pipeline", pipeline] + cmds[pipeline] = Command(proj=self.proj, cmd=cmd) + arts[pipeline] = Process(proj=self.proj, cmd=cmd) + stages[pipeline] = PipelineStage(proj=self.proj, name=pipeline, cmd=cmd) + + arts["viz"] = Server(proj=self.proj, cmd=["kedro", "viz", "run"]) + + if stages: + conts["pipeline_stage"] = stages + conts["command"] = cmds + self._contents = conts + self._artifacts = arts + + +class Airflow(ProjectSpec): + """Apache Airflow workflow orchestration project/DAG spec.""" + + spec_doc = ( + "https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/dags.html" + ) + + def match(self) -> bool: + dags_dir = f"{self.proj.url}/dags" + try: + if not self.proj.fs.isdir(dags_dir): + return False + entries = self.proj.fs.ls(dags_dir, detail=False) + return any(e.endswith(".py") for e in entries) + except Exception: + return False + + def parse(self) -> None: + from projspec.artifact.process import Process, Server + from projspec.content.cicd import PipelineStage + from projspec.content.executable import Command + + dags_dir = f"{self.proj.url}/dags" + try: + entries = self.proj.fs.ls(dags_dir, detail=False) + except Exception as exc: + raise ParseFailed(f"Could not list dags/: {exc}") from exc + + stages = AttrDict() + for entry in entries: + if not entry.endswith(".py"): + continue + dag_name = os.path.basename(entry).replace(".py", "") + if dag_name.startswith("_"): + continue + # Try to extract dag_id from file content + try: + with self.proj.fs.open(entry, "rt") as f: + content = f.read() + dag_ids = re.findall(r'dag_id\s*=\s*["\']([^"\']+)["\']', content) + for dag_id in dag_ids: + stages[dag_id] = PipelineStage( + proj=self.proj, + name=dag_id, + cmd=["airflow", "dags", "trigger", dag_id], + ) + except Exception: + stages[dag_name] = PipelineStage( + proj=self.proj, + name=dag_name, + cmd=["airflow", "dags", "trigger", dag_name], + ) + + cmds = AttrDict( + standalone=Command(proj=self.proj, cmd=["airflow", "standalone"]), + scheduler=Command(proj=self.proj, cmd=["airflow", "scheduler"]), + webserver=Command(proj=self.proj, cmd=["airflow", "webserver"]), + ) + arts = AttrDict( + standalone=Process(proj=self.proj, cmd=["airflow", "standalone"]), + webserver=Server( + proj=self.proj, cmd=["airflow", "webserver", "--port", "8080"] + ), + ) + + conts = AttrDict(command=cmds) + if stages: + conts["pipeline_stage"] = stages + + self._contents = conts + self._artifacts = arts + + +class Snakemake(ProjectSpec): + """Snakemake workflow management system project.""" + + spec_doc = ( + "https://snakemake.readthedocs.io/en/stable/snakefiles/configuration.html" + ) + + def match(self) -> bool: + if "Snakefile" in self.proj.basenames: + return True + # also detect workflow/Snakefile layout + workflow_snakefile = f"{self.proj.url}/workflow/Snakefile" + try: + return self.proj.fs.isfile(workflow_snakefile) + except Exception: + return False + + def parse(self) -> None: + from projspec.artifact.process import Process + from projspec.content.cicd import PipelineStage + from projspec.content.executable import Command + + # Determine snakefile path + if "Snakefile" in self.proj.basenames: + snakefile_path = "Snakefile" + else: + snakefile_path = "workflow/Snakefile" + + # Parse rules from snakefile + rule_names: list[str] = [] + try: + with self.proj.get_file(snakefile_path) as f: + content = f.read() + rule_names = re.findall(r"^rule\s+(\w+)\s*:", content, re.MULTILINE) + except Exception: + pass # rules are optional — we still expose the run command + + cmds = AttrDict() + arts = AttrDict() + stages = AttrDict() + + # Generic run command + run_cmd = ["snakemake", "--cores", "all"] + cmds["run"] = Command(proj=self.proj, cmd=run_cmd) + arts["run"] = Process(proj=self.proj, cmd=run_cmd) + + for rule in rule_names: + if rule in ("all", "clean"): + continue + cmd = ["snakemake", rule, "--cores", "all"] + cmds[rule] = Command(proj=self.proj, cmd=cmd) + stages[rule] = PipelineStage(proj=self.proj, name=rule, cmd=cmd) + + if stages: + self._contents = AttrDict(command=cmds, pipeline_stage=stages) + else: + self._contents = AttrDict(command=cmds) + self._artifacts = AttrDict(process=arts) + + +class Nox(ProjectSpec): + """Nox Python automation project. + + Often used for testing, linting, and packaging. Nox is a Python + environment management tool that allows you to define multiple + CI runs in one execution. + """ + + spec_doc = "https://nox.thea.codes/en/stable/config.html" + + def match(self) -> bool: + return "noxfile.py" in self.proj.basenames + + def parse(self) -> None: + from projspec.artifact.process import Process + from projspec.content.executable import Command + + # Discover session names via regex on noxfile.py + session_names: list[str] = [] + try: + with self.proj.get_file("noxfile.py") as f: + content = f.read() + # Sessions are decorated functions: @nox.session or @session + session_names = re.findall( + r"@(?:nox\.)?session(?:\([^)]*\))?\s+def\s+(\w+)", + content, + re.MULTILINE, + ) + except Exception: + pass + + cmds = AttrDict() + arts = AttrDict() + + if not session_names: + cmds["nox"] = Command(proj=self.proj, cmd=["nox"]) + arts["nox"] = Process(proj=self.proj, cmd=["nox"]) + else: + for name in session_names: + cmd = ["nox", "-s", name] + cmds[name] = Command(proj=self.proj, cmd=cmd) + arts[name] = Process(proj=self.proj, cmd=cmd) + + self._contents = AttrDict(command=cmds) + self._artifacts = AttrDict(process=arts) + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal noxfile.py.""" + with open(os.path.join(path, "noxfile.py"), "wt") as f: + f.write( + "import nox\n" + "\n" + "\n" + "@nox.session\n" + "def tests(session):\n" + ' """Run the test suite."""\n' + " session.install('pytest')\n" + " session.run('pytest')\n" + "\n" + "\n" + "@nox.session\n" + "def lint(session):\n" + ' """Lint the code."""\n' + " session.install('ruff')\n" + " session.run('ruff', 'check', '.')\n" + ) + + +class Metaflow(ProjectSpec): + """Metaflow ML/data science workflow project. + + Metaflow has no project-level config file; detection is done by scanning + Python files for `from metaflow import` (or `import metaflow`) combined + with a `FlowSpec` subclass definition. + + Each `.py` file containing a flow becomes a separate `Command` / + `Process` pair keyed by the file stem. If a `@project(name=...)` + decorator is found, the project name is captured in metadata. If + `@schedule` or `@trigger` decorators are present, deployment commands + for Argo Workflows and AWS Step Functions are added alongside the local + `run` command. + + No explicit parsing of Config files, since they are designed as + defaults, and often overridden (and hard to detect). + """ + + spec_doc = "https://docs.metaflow.org" + + _IMPORT_RE = re.compile(r"from\s+metaflow\s+import|import\s+metaflow") + _FLOW_RE = re.compile(r"class\s+(\w+)\s*\(\s*\w*FlowSpec\s*\)") + _PROJECT_RE = re.compile(r'@project\s*\(\s*name\s*=\s*["\']([^"\']+)["\']') + _STEP_RE = re.compile(r"@step\s+def\s+(\w+)\s*\(") + _DEPLOY_RE = re.compile(r"@schedule|@trigger|@trigger_on_finish|@project") + + def match(self) -> bool: + for path, content in self.proj.scanned_files.items(): + if not path.endswith(".py"): + continue + try: + src = content.decode() + except Exception: + continue + if self._IMPORT_RE.search(src) and self._FLOW_RE.search(src): + return True + return False + + def parse(self) -> None: + from projspec.artifact.process import Process + from projspec.content.cicd import PipelineStage + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + cmds = AttrDict() + arts = AttrDict() + stages = AttrDict() + project_names: list[str] = [] + + for full_path, content in self.proj.scanned_files.items(): + if not full_path.endswith(".py"): + continue + try: + src = content.decode() + except Exception: + continue + + if not (self._IMPORT_RE.search(src) and self._FLOW_RE.search(src)): + continue + + # Relative path for use in commands + rel = full_path.replace(self.proj.url, "").lstrip("/") + stem = os.path.basename(rel).replace(".py", "") + + # Flow class name and @project name + flow_match = self._FLOW_RE.search(src) + flow_class = flow_match.group(1) if flow_match else stem + + proj_match = self._PROJECT_RE.search(src) + if proj_match: + project_names.append(proj_match.group(1)) + + # Step names → pipeline stages + step_names = self._STEP_RE.findall(src) + for step in step_names: + stage_key = f"{stem}.{step}" + stages[stage_key] = PipelineStage( + proj=self.proj, + name=step, + cmd=["python", rel, "run", f"--start", step], + ) + + # Local run command + run_cmd = ["python", rel, "run"] + cmds[stem] = Command(proj=self.proj, cmd=run_cmd) + arts[stem] = Process(proj=self.proj, cmd=run_cmd) + + # Deployment commands — only when scheduling/trigger decorators present + if self._DEPLOY_RE.search(src): + arts[f"{stem}.argo_create"] = Process( + proj=self.proj, + cmd=["python", rel, "argo-workflows", "create"], + ) + arts[f"{stem}.step_functions_create"] = Process( + proj=self.proj, + cmd=["python", rel, "step-functions", "create"], + ) + + if not cmds: + raise ParseFailed("No Metaflow flows found in scanned files") + + conts = AttrDict() + meta: dict[str, str] = {} + if project_names: + meta["project"] = ", ".join(sorted(set(project_names))) + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + if stages: + conts["pipeline_stage"] = stages + conts["command"] = cmds + + self._contents = conts + self._artifacts = AttrDict(process=arts) + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Metaflow project with a single HelloFlow.""" + name = os.path.basename(path).replace("-", "_").replace(" ", "_") + flow_name = "".join(part.title() for part in name.split("_")) + "Flow" + with open(os.path.join(path, "flow.py"), "wt") as f: + f.write( + "from metaflow import FlowSpec, step\n" + "\n" + "\n" + f"class {flow_name}(FlowSpec):\n" + f' """{flow_name} — generated by projspec."""\n' + "\n" + " @step\n" + " def start(self):\n" + ' """Entry point."""\n' + " print('Starting flow')\n" + " self.next(self.end)\n" + "\n" + " @step\n" + " def end(self):\n" + ' """Final step."""\n' + " print('Flow complete')\n" + "\n" + "\n" + "if __name__ == '__main__':\n" + f" {flow_name}()\n" + ) + + +class MLFlow(ProjectSpec): + """MLflow project, defined by an `MLproject` (or `MLFlow`) file. + + An MLproject file is a YAML document that declares the project name, + the environment (conda or pip), and one or more named entry points. + """ + + spec_doc = ( + "https://mlflow.org/docs/latest/ml/projects/#mlproject-file-configuration" + ) + + def match(self) -> bool: + return "MLproject" in self.proj.basenames or "MLFlow" in self.proj.basenames + + def parse(self) -> None: + from projspec.artifact.process import Process + from projspec.content.environment import Environment, Precision, Stack + from projspec.content.executable import Command + + fname = "MLproject" if "MLproject" in self.proj.basenames else "MLFlow" + with self.proj.fs.open(self.proj.basenames[fname], "rt") as f: + meta = yaml.safe_load(f) + + if "python_env" in meta: + with self.proj.get_file(meta["python_env"], text=True) as f: + env = yaml.safe_load(f) + self._contents["environment"] = Environment( + stack=Stack.PIP, + precision=Precision.SPEC, + packages=env.get("dependencies", []) + + [f"python {env.get('python', '')}"], + proj=self.proj, + ) + elif "conda_env" in meta: + with self.proj.get_file(meta["conda_env"], text=True) as f: + env = yaml.safe_load(f) + self._contents["environment"] = Environment( + stack=Stack.CONDA, + precision=Precision.SPEC, + packages=env.get("dependencies", []), + channels=env.get("channels"), + proj=self.proj, + ) + + cmds = AttrDict() + arts = AttrDict() + for name, ep in meta.get("entry_points", {}).items(): + cmds[name] = Command(proj=self.proj, cmd=ep["command"]) + arts[name] = Process(proj=self.proj, cmd=["mlflow", "run", ".", "-e", name]) + + if cmds: + self._contents["command"] = cmds + if arts: + self._artifacts = AttrDict(process=arts) + if self._contents is None: + self._contents = AttrDict() + if self._artifacts is None: + self._artifacts = AttrDict() + + @staticmethod + def _create(path: str) -> None: + with open(f"{path}/MLproject", "w") as f: + f.write( + "name: tutorial\n" + "\n" + "conda_env: conda.yaml\n" + "\n" + "entry_points:\n" + " main:\n" + " parameters:\n" + " alpha: {type: float, default: 0.5}\n" + " l1_ratio: {type: float, default: 0.1}\n" + ' command: "python train.py {alpha} {l1_ratio}"\n' + ) + with open(f"{path}/conda.yaml", "w") as f: + f.write( + "name: ml-project\n" + "channels:\n" + " - conda-forge\n" + "dependencies:\n" + " - python=3.9\n" + ) + with open(f"{path}/train.py", "w") as f: + f.write("# MLFlow training script\n") diff --git a/src/projspec/proj/documentation.py b/src/projspec/proj/documentation.py index e6fdeb1..77d018b 100644 --- a/src/projspec/proj/documentation.py +++ b/src/projspec/proj/documentation.py @@ -2,6 +2,7 @@ import re import toml +import yaml from projspec.proj import ProjectSpec from projspec.proj.base import ParseFailed @@ -259,3 +260,233 @@ def _create(path: str) -> None: # docs/requirements.txt — build dependencies with open(f"{path}/docs/requirements.txt", "wt") as f: f.write("sphinx\n") + + +class MkDocs(ProjectSpec): + """MkDocs documentation project.""" + + spec_doc = "https://www.mkdocs.org/user-guide/configuration/" + + _NAMES = {"mkdocs.yml", "mkdocs.yaml"} + + def match(self) -> bool: + return bool(self._NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.infra import StaticSite + from projspec.artifact.process import Server + from projspec.content.metadata import DescriptiveMetadata + + fname = next(n for n in self._NAMES if n in self.proj.basenames) + try: + with self.proj.get_file(fname) as f: + cfg = yaml.safe_load(f) + except Exception as exc: + raise ParseFailed(f"Could not read {fname}: {exc}") from exc + + cfg = cfg or {} + meta: dict[str, str] = {} + for key in ("site_name", "site_description", "site_author", "repo_url"): + if val := cfg.get(key): + meta[key] = str(val) + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + site_dir = cfg.get("site_dir", "site") + + arts = AttrDict( + docs=StaticSite( + proj=self.proj, + cmd=["mkdocs", "build"], + fn=f"{self.proj.url}/{site_dir}/index.html", + ), + serve=Server( + proj=self.proj, + cmd=["mkdocs", "serve"], + ), + ) + + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal MkDocs project.""" + name = os.path.basename(path) + with open(os.path.join(path, "mkdocs.yml"), "wt") as f: + f.write( + f"site_name: {name}\n" + "\n" + "nav:\n" + " - Home: index.md\n" + "\n" + "theme:\n" + " name: material\n" + ) + docs_dir = os.path.join(path, "docs") + os.makedirs(docs_dir, exist_ok=True) + with open(os.path.join(docs_dir, "index.md"), "wt") as f: + f.write(f"# {name}\n\nWelcome to the documentation.\n") + + +class Sphinx(ProjectSpec): + """Sphinx documentation project (standalone, without ReadTheDocs config).""" + + spec_doc = "https://www.sphinx-doc.org/en/master/usage/configuration.html" + + def match(self) -> bool: + if "conf.py" in self.proj.basenames: + return True + # Check docs/conf.py + docs_conf = f"{self.proj.url}/docs/conf.py" + try: + return self.proj.fs.isfile(docs_conf) + except Exception: + return False + + def parse(self) -> None: + from projspec.artifact.infra import StaticSite + from projspec.artifact.process import Server + from projspec.content.metadata import DescriptiveMetadata + + # Find conf.py + if "conf.py" in self.proj.basenames: + conf_path = self.proj.basenames["conf.py"] + docs_dir = self.proj.url + else: + conf_path = f"{self.proj.url}/docs/conf.py" + docs_dir = f"{self.proj.url}/docs" + + meta: dict[str, str] = {} + try: + with self.proj.fs.open(conf_path, "rt") as f: + content = f.read() + for var in ("project", "author", "release", "version"): + m = re.search( + rf'^{var}\s*=\s*["\']([^"\']+)["\']', content, re.MULTILINE + ) + if m: + meta[var] = m.group(1) + except Exception: + pass + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + build_dir = f"{docs_dir}/_build/html" + arts = AttrDict( + docs=StaticSite( + proj=self.proj, + cmd=["sphinx-build", "-b", "html", docs_dir, build_dir], + fn=f"{build_dir}/index.html", + ), + autobuild=Server( + proj=self.proj, + cmd=["sphinx-autobuild", docs_dir, build_dir], + ), + ) + + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Sphinx docs project.""" + name = os.path.basename(path) + docs_dir = os.path.join(path, "docs") + os.makedirs(docs_dir, exist_ok=True) + + with open(os.path.join(docs_dir, "conf.py"), "wt") as f: + f.write( + f'project = "{name}"\n' "extensions = []\n" 'html_theme = "alabaster"\n' + ) + with open(os.path.join(docs_dir, "index.rst"), "wt") as f: + f.write(f"{name}\n{'=' * len(name)}\n\n.. toctree::\n :maxdepth: 2\n") + with open(os.path.join(docs_dir, "requirements.txt"), "wt") as f: + f.write("sphinx\n") + + +class Docusaurus(ProjectSpec): + """Docusaurus documentation/website project.""" + + spec_doc = "https://docusaurus.io/docs/configuration" + + _CONFIG_NAMES = { + "docusaurus.config.js", + "docusaurus.config.ts", + "docusaurus.config.mjs", + } + + def match(self) -> bool: + return bool(self._CONFIG_NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.infra import StaticSite + from projspec.artifact.process import Server + from projspec.content.metadata import DescriptiveMetadata + + fname = next(n for n in self._CONFIG_NAMES if n in self.proj.basenames) + + meta: dict[str, str] = {} + try: + with self.proj.get_file(fname) as f: + content = f.read() + for key in ( + "title", + "tagline", + "url", + "organizationName", + "projectName", + ): + m = re.search(rf'{key}\s*:\s*["\']([^"\']+)["\']', content) + if m: + meta[key] = m.group(1) + except Exception: + pass + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + pkg_mgr = "yarn" if "yarn.lock" in self.proj.basenames else "npm" + arts = AttrDict( + build=StaticSite( + proj=self.proj, + cmd=[pkg_mgr, "run", "build"], + fn=f"{self.proj.url}/build/index.html", + ), + start=Server( + proj=self.proj, + cmd=[pkg_mgr, "run", "start"], + ), + ) + + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Docusaurus project via npx.""" + from projspec.utils import run_subprocess + + name = os.path.basename(path) + run_subprocess( + [ + "npx", + "create-docusaurus@latest", + name, + "classic", + "--skip-install", + ], + cwd=os.path.dirname(path) or ".", + output=False, + ) diff --git a/src/projspec/proj/infra.py b/src/projspec/proj/infra.py new file mode 100644 index 0000000..caab443 --- /dev/null +++ b/src/projspec/proj/infra.py @@ -0,0 +1,552 @@ +"""Infrastructure/deployment project specs: DockerCompose, Terraform, Ansible, Pulumi, CDK, Earthfile, Nixpacks, Vagrant.""" + +import os +import re + +import yaml + +from projspec.proj.base import ParseFailed, ProjectSpec +from projspec.utils import AttrDict + + +class DockerCompose(ProjectSpec): + """Docker Compose multi-service project. + + Designed to launch a set of runtimes (specific images with config), volumes + and networks, and expose ports. + """ + + spec_doc = "https://docs.docker.com/reference/compose-file/" + + _NAMES = { + "docker-compose.yml", + "docker-compose.yaml", + "compose.yml", + "compose.yaml", + } + + def match(self) -> bool: + return bool(self._NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.infra import ComposeStack + from projspec.content.cicd import ServiceDependency + from projspec.content.metadata import DescriptiveMetadata + + fname = next(n for n in self._NAMES if n in self.proj.basenames) + try: + with self.proj.get_file(fname) as f: + cfg = yaml.safe_load(f) + except Exception as exc: + raise ParseFailed(f"Could not read {fname}: {exc}") from exc + + if not isinstance(cfg, dict): + raise ParseFailed(f"{fname} did not parse to a mapping") + + services = cfg.get("services", {}) + service_deps = AttrDict() + for svc_name, svc_cfg in services.items(): + if not isinstance(svc_cfg, dict): + continue + image = svc_cfg.get("image", "") + # Guess service type from image name + svc_type = image.split(":")[0].split("/")[-1] if image else svc_name + service_deps[svc_name] = ServiceDependency( + proj=self.proj, + name=svc_name, + service_type=svc_type, + version=image.split(":")[-1] if ":" in image else "", + image=image, + ) + + conts = AttrDict() + if service_deps: + conts["service_dependency"] = service_deps + + meta: dict[str, str] = {} + if "name" in cfg: + meta["name"] = str(cfg["name"]) + if services: + meta["services"] = ", ".join(services.keys()) + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + self._contents = conts + self._artifacts = AttrDict(stack=ComposeStack(proj=self.proj, file=fname)) + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal docker-compose.yml.""" + with open(os.path.join(path, "docker-compose.yml"), "wt") as f: + f.write( + "services:\n" + " app:\n" + " image: alpine:latest\n" + " command: echo 'Hello from Docker Compose!'\n" + ) + + +class Terraform(ProjectSpec): + """Terraform infrastructure-as-code project.""" + + spec_doc = "https://developer.hashicorp.com/terraform/language" + + def match(self) -> bool: + return any(n.endswith(".tf") for n in self.proj.basenames) + + def parse(self) -> None: + from projspec.artifact.infra import TerraformPlan + from projspec.artifact.process import Process + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + # Extract resource types from .tf files + resource_types: set[str] = set() + providers: set[str] = set() + for basename, full_path in self.proj.basenames.items(): + if not basename.endswith(".tf"): + continue + try: + with self.proj.fs.open(full_path, "rt") as f: + content = f.read() + resource_types.update( + re.findall(r'^resource\s+"([^"]+)"', content, re.MULTILINE) + ) + providers.update( + re.findall(r'source\s*=\s*"[^/]+/([^"]+)"', content, re.MULTILINE) + ) + except Exception: + pass + + conts = AttrDict() + meta: dict[str, str] = {} + if providers: + meta["providers"] = ", ".join(sorted(providers)) + if resource_types: + meta["resource_types"] = ", ".join(sorted(resource_types)) + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + tf_commands = { + "init": ["terraform", "init"], + "validate": ["terraform", "validate"], + "apply": ["terraform", "apply", "-auto-approve"], + "destroy": ["terraform", "destroy", "-auto-approve"], + "output": ["terraform", "output"], + } + cmds = AttrDict() + arts = AttrDict() + for name, cmd in tf_commands.items(): + cmds[name] = Command(proj=self.proj, cmd=cmd) + arts[name] = Process(proj=self.proj, cmd=cmd) + + arts["plan"] = TerraformPlan(proj=self.proj) + + conts["command"] = cmds + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Terraform project.""" + with open(os.path.join(path, "main.tf"), "wt") as f: + f.write( + "terraform {\n" + ' required_version = ">= 1.0"\n' + "}\n" + "\n" + "# Add your resources here\n" + '# resource "aws_instance" "example" {\n' + '# ami = "ami-0c55b159cbfafe1f0"\n' + '# instance_type = "t2.micro"\n' + "# }\n" + ) + with open(os.path.join(path, "variables.tf"), "wt") as f: + f.write( + "# Define input variables here\n" + '# variable "region" {\n' + '# default = "us-east-1"\n' + "# }\n" + ) + with open(os.path.join(path, "outputs.tf"), "wt") as f: + f.write("# Define outputs here\n") + + +class Ansible(ProjectSpec): + """Ansible automation project.""" + + spec_doc = "https://docs.ansible.com/ansible/latest/reference_appendices/playbooks_keywords.html" + + _PLAYBOOK_NAMES = {"playbook.yml", "playbook.yaml", "site.yml", "site.yaml"} + + def match(self) -> bool: + if "ansible.cfg" in self.proj.basenames: + return True + if bool(self._PLAYBOOK_NAMES.intersection(self.proj.basenames)): + return True + # roles/ directory alongside a YAML file + if self.proj.fs.isdir(f"{self.proj.url}/roles"): + return any(n.endswith((".yml", ".yaml")) for n in self.proj.basenames) + return False + + def parse(self) -> None: + from projspec.artifact.process import Process + from projspec.content.executable import Command + + # Find playbook files + playbook_files = [ + n + for n in self.proj.basenames + if n.endswith((".yml", ".yaml")) + and n not in {"requirements.yml", "galaxy.yml"} + ] + + cmds = AttrDict() + arts = AttrDict() + + for pb in playbook_files: + name = pb.replace(".yml", "").replace(".yaml", "") + cmd = ["ansible-playbook", pb] + cmds[name] = Command(proj=self.proj, cmd=cmd) + arts[name] = Process(proj=self.proj, cmd=cmd) + + if not cmds: + cmds["run"] = Command(proj=self.proj, cmd=["ansible-playbook", "site.yml"]) + arts["run"] = Process(proj=self.proj, cmd=["ansible-playbook", "site.yml"]) + + self._contents = AttrDict(command=cmds) + self._artifacts = AttrDict(process=arts) + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Ansible project.""" + with open(os.path.join(path, "playbook.yml"), "wt") as f: + f.write( + "---\n" + "- name: Example playbook\n" + " hosts: localhost\n" + " gather_facts: false\n" + " tasks:\n" + " - name: Print hello\n" + " ansible.builtin.debug:\n" + " msg: 'Hello from Ansible!'\n" + ) + with open(os.path.join(path, "inventory"), "wt") as f: + f.write("localhost ansible_connection=local\n") + + +class Pulumi(ProjectSpec): + """Pulumi infrastructure-as-code project.""" + + spec_doc = "https://www.pulumi.com/docs/reference/pulumi-yaml/" + + _NAMES = {"Pulumi.yaml", "Pulumi.yml"} + + def match(self) -> bool: + return bool(self._NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.deployment import Deployment + from projspec.artifact.process import Process + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + fname = next(n for n in self._NAMES if n in self.proj.basenames) + try: + with self.proj.get_file(fname) as f: + cfg = yaml.safe_load(f) + except Exception as exc: + raise ParseFailed(f"Could not read {fname}: {exc}") from exc + + if not isinstance(cfg, dict): + raise ParseFailed(f"{fname} did not parse to a mapping") + + meta: dict[str, str] = {} + for key in ("name", "description", "runtime"): + if val := cfg.get(key): + meta[key] = ( + str(val) if not isinstance(val, dict) else str(val.get("name", val)) + ) + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + stack_name = cfg.get("name", "dev") + cmds = AttrDict( + up=Command(proj=self.proj, cmd=["pulumi", "up", "--yes"]), + destroy=Command(proj=self.proj, cmd=["pulumi", "destroy", "--yes"]), + preview=Command(proj=self.proj, cmd=["pulumi", "preview"]), + ) + arts = AttrDict( + deploy=Deployment( + proj=self.proj, + cmd=["pulumi", "up", "--yes"], + release=stack_name, + clean_cmd=["pulumi", "destroy", "--yes"], + ), + preview=Process(proj=self.proj, cmd=["pulumi", "preview"]), + ) + + conts["command"] = cmds + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Pulumi YAML project.""" + name = os.path.basename(path) + with open(os.path.join(path, "Pulumi.yaml"), "wt") as f: + f.write( + f"name: {name}\n" + "runtime: yaml\n" + "description: A Pulumi YAML project\n" + "\n" + "resources: {}\n" + ) + + +class CDK(ProjectSpec): + """AWS Cloud Development Kit (CDK) project.""" + + spec_doc = "https://docs.aws.amazon.com/cdk/v2/guide/projects.html" + + def match(self) -> bool: + return "cdk.json" in self.proj.basenames + + def parse(self) -> None: + import json + from projspec.artifact.deployment import Deployment + from projspec.artifact.process import Process + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + try: + with self.proj.get_file("cdk.json") as f: + cfg = json.loads(f.read()) + except Exception as exc: + raise ParseFailed(f"Could not read cdk.json: {exc}") from exc + + if not isinstance(cfg, dict): + raise ParseFailed("cdk.json did not parse to a mapping") + + app_cmd = cfg.get("app", "") + conts = AttrDict() + if app_cmd: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta={"app": app_cmd} + ) + + cmds = AttrDict( + synth=Command(proj=self.proj, cmd=["cdk", "synth"]), + deploy=Command(proj=self.proj, cmd=["cdk", "deploy", "--all"]), + destroy=Command(proj=self.proj, cmd=["cdk", "destroy", "--all"]), + diff=Command(proj=self.proj, cmd=["cdk", "diff"]), + ) + arts = AttrDict( + deploy=Deployment( + proj=self.proj, + cmd=["cdk", "deploy", "--all", "--require-approval", "never"], + release="cdk", + clean_cmd=["cdk", "destroy", "--all", "--force"], + ), + diff=Process(proj=self.proj, cmd=["cdk", "diff"]), + synth=Process(proj=self.proj, cmd=["cdk", "synth"]), + ) + + conts["command"] = cmds + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal CDK project.""" + with open(os.path.join(path, "cdk.json"), "wt") as f: + f.write('{\n "app": "npx ts-node --prefer-ts-exts bin/app.ts"\n}\n') + + +class Earthfile(ProjectSpec): + """Earthly build project.""" + + spec_doc = "https://docs.earthly.dev/docs/earthfile" + + def match(self) -> bool: + return "Earthfile" in self.proj.basenames + + def parse(self) -> None: + from projspec.artifact.process import Process + from projspec.content.executable import Command + + # Parse targets from Earthfile + target_names: list[str] = [] + try: + with self.proj.get_file("Earthfile") as f: + content = f.read() + target_names = re.findall( + r"^([a-zA-Z][a-zA-Z0-9_-]*):", content, re.MULTILINE + ) + except Exception: + pass + + cmds = AttrDict() + arts = AttrDict() + + for target in target_names: + if target.upper() == target: + # All-caps are typically Earthly VERSION/ARG/etc directives, skip + continue + cmd = ["earthly", f"+{target}"] + cmds[target] = Command(proj=self.proj, cmd=cmd) + arts[target] = Process(proj=self.proj, cmd=cmd) + + if not cmds: + cmds["build"] = Command(proj=self.proj, cmd=["earthly", "+build"]) + arts["build"] = Process(proj=self.proj, cmd=["earthly", "+build"]) + + self._contents = AttrDict(command=cmds) + self._artifacts = AttrDict(process=arts) + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Earthfile.""" + with open(os.path.join(path, "Earthfile"), "wt") as f: + f.write( + "VERSION 0.8\n" + "\n" + "build:\n" + " FROM alpine:latest\n" + " RUN echo 'Hello from Earthly!'\n" + "\n" + "test:\n" + " FROM +build\n" + " RUN echo 'Tests passed!'\n" + ) + + +class Nixpacks(ProjectSpec): + """Nixpacks build configuration project.""" + + spec_doc = "https://nixpacks.com/docs/configuration/file" + + def match(self) -> bool: + return "nixpacks.toml" in self.proj.basenames + + def parse(self) -> None: + import toml + from projspec.artifact.process import Process + from projspec.content.metadata import DescriptiveMetadata + from projspec.utils import PickleableTomlDecoder + + try: + with self.proj.get_file("nixpacks.toml", text=False) as f: + cfg = toml.loads(f.read().decode(), decoder=PickleableTomlDecoder()) + except Exception as exc: + raise ParseFailed(f"Could not read nixpacks.toml: {exc}") from exc + + meta: dict[str, str] = {} + phases = cfg.get("phases", {}) + if phases: + meta["phases"] = ", ".join(phases.keys()) + start = cfg.get("start", {}) + if start_cmd := start.get("cmd"): + meta["start_cmd"] = str(start_cmd) + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + name = os.path.basename(self.proj.url).lower() + arts = AttrDict( + build=Process( + proj=self.proj, cmd=["nixpacks", "build", ".", "--name", name] + ), + ) + + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal nixpacks.toml.""" + with open(os.path.join(path, "nixpacks.toml"), "wt") as f: + f.write( + "[phases.setup]\n" + "nixPkgs = ['python311']\n" + "\n" + "[phases.install]\n" + "cmds = ['pip install -r requirements.txt']\n" + "\n" + "[start]\n" + "cmd = 'python app.py'\n" + ) + + +class Vagrant(ProjectSpec): + """Vagrant virtual machine project.""" + + spec_doc = "https://developer.hashicorp.com/vagrant/docs/vagrantfile" + + def match(self) -> bool: + return "Vagrantfile" in self.proj.basenames + + def parse(self) -> None: + from projspec.artifact.process import Process, Server + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + # Extract box name from Vagrantfile via simple regex + meta: dict[str, str] = {} + try: + with self.proj.get_file("Vagrantfile") as f: + content = f.read() + boxes = re.findall(r'config\.vm\.box\s*=\s*["\']([^"\']+)["\']', content) + if boxes: + meta["box"] = boxes[0] + hostname_match = re.search( + r'config\.vm\.hostname\s*=\s*["\']([^"\']+)["\']', content + ) + if hostname_match: + meta["hostname"] = hostname_match.group(1) + except Exception: + pass + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + cmds = AttrDict( + up=Command(proj=self.proj, cmd=["vagrant", "up"]), + halt=Command(proj=self.proj, cmd=["vagrant", "halt"]), + destroy=Command(proj=self.proj, cmd=["vagrant", "destroy", "-f"]), + ssh=Command(proj=self.proj, cmd=["vagrant", "ssh"]), + ) + arts = AttrDict( + vm=Server(proj=self.proj, cmd=["vagrant", "up"]), + ) + + conts["command"] = cmds + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + """Scaffold a minimal Vagrantfile.""" + with open(os.path.join(path, "Vagrantfile"), "wt") as f: + f.write( + 'Vagrant.configure("2") do |config|\n' + ' config.vm.box = "ubuntu/jammy64"\n' + ' config.vm.provider "virtualbox" do |vb|\n' + ' vb.memory = "1024"\n' + " end\n" + "end\n" + ) diff --git a/src/projspec/proj/jsframeworks.py b/src/projspec/proj/jsframeworks.py new file mode 100644 index 0000000..d7b6744 --- /dev/null +++ b/src/projspec/proj/jsframeworks.py @@ -0,0 +1,379 @@ +"""JavaScript/Node.js framework specs: Next.js, Nuxt.js, SvelteKit, Vite, Deno, Bun, pnpm.""" + +import os + +from projspec.proj.base import ParseFailed, ProjectSpec +from projspec.proj.node import Node +from projspec.utils import AttrDict, run_subprocess + + +class NextJS(Node): + """Next.js React framework project.""" + + spec_doc = "https://nextjs.org/docs/app/api-reference/config/next-config-js" + + _CONFIG_NAMES = { + "next.config.js", + "next.config.mjs", + "next.config.ts", + "next.config.cjs", + } + + def match(self) -> bool: + return bool(self._CONFIG_NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.base import FileArtifact + from projspec.artifact.process import Server + + super().parse0() + + pkg_mgr = self._pkg_manager() + # Development server + self._artifacts["dev"] = Server( + proj=self.proj, + cmd=[pkg_mgr, "run", "dev"], + ) + # Production build + self._artifacts["build"] = FileArtifact( + proj=self.proj, + cmd=[pkg_mgr, "run", "build"], + fn=f"{self.proj.url}/.next/BUILD_ID", + ) + # Production start + self._artifacts["start"] = Server( + proj=self.proj, + cmd=[pkg_mgr, "run", "start"], + ) + + def _pkg_manager(self) -> str: + if "yarn.lock" in self.proj.basenames: + return "yarn" + if "pnpm-lock.yaml" in self.proj.basenames: + return "pnpm" + if "bun.lock" in self.proj.basenames or "bun.lockb" in self.proj.basenames: + return "bun" + return "npm" + + @staticmethod + def _create(path: str) -> None: + run_subprocess( + ["npx", "create-next-app@latest", path, "--yes"], + cwd=os.path.dirname(path) or ".", + output=False, + ) + + +class NuxtJS(Node): + """Nuxt.js Vue framework project.""" + + spec_doc = "https://nuxt.com/docs/api/nuxt-config" + + _CONFIG_NAMES = {"nuxt.config.js", "nuxt.config.ts", "nuxt.config.mjs"} + + def match(self) -> bool: + return bool(self._CONFIG_NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.base import FileArtifact + from projspec.artifact.process import Server + + super().parse0() + + pkg_mgr = self._pkg_manager() + self._artifacts["dev"] = Server( + proj=self.proj, + cmd=[pkg_mgr, "run", "dev"], + ) + self._artifacts["build"] = FileArtifact( + proj=self.proj, + cmd=[pkg_mgr, "run", "build"], + fn=f"{self.proj.url}/.nuxt/tsconfig.json", + ) + self._artifacts["generate"] = FileArtifact( + proj=self.proj, + cmd=[pkg_mgr, "run", "generate"], + fn=f"{self.proj.url}/.output/public/index.html", + ) + + def _pkg_manager(self) -> str: + if "yarn.lock" in self.proj.basenames: + return "yarn" + if "pnpm-lock.yaml" in self.proj.basenames: + return "pnpm" + return "npm" + + @staticmethod + def _create(path: str) -> None: + run_subprocess( + ["npx", "nuxi@latest", "init", path], + cwd=os.path.dirname(path) or ".", + output=False, + ) + + +class SvelteKit(Node): + """SvelteKit project.""" + + spec_doc = "https://svelte.dev/docs/kit/configuration" + + _CONFIG_NAMES = {"svelte.config.js", "svelte.config.ts"} + + def match(self) -> bool: + return bool(self._CONFIG_NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.base import FileArtifact + from projspec.artifact.process import Server + + super().parse0() + + pkg_mgr = self._pkg_manager() + self._artifacts["dev"] = Server( + proj=self.proj, + cmd=[pkg_mgr, "run", "dev"], + ) + self._artifacts["build"] = FileArtifact( + proj=self.proj, + cmd=[pkg_mgr, "run", "build"], + fn=f"{self.proj.url}/.svelte-kit/output/client/index.html", + ) + self._artifacts["preview"] = Server( + proj=self.proj, + cmd=[pkg_mgr, "run", "preview"], + ) + + def _pkg_manager(self) -> str: + if "yarn.lock" in self.proj.basenames: + return "yarn" + if "pnpm-lock.yaml" in self.proj.basenames: + return "pnpm" + if "bun.lock" in self.proj.basenames or "bun.lockb" in self.proj.basenames: + return "bun" + return "npm" + + @staticmethod + def _create(path: str) -> None: + run_subprocess( + ["npm", "create", "svelte@latest", path], + cwd=os.path.dirname(path) or ".", + output=False, + ) + + +class Vite(Node): + """Vite-based project (any frontend framework using Vite as the build tool). + + Note: SvelteKit also has a svelte.config, so + SvelteKit takes priority via its more-specific match. + """ + + spec_doc = "https://vitejs.dev/config/" + + _CONFIG_NAMES = { + "vite.config.js", + "vite.config.ts", + "vite.config.mjs", + "vite.config.cjs", + "vite.config.mts", + } + + def match(self) -> bool: + return bool(self._CONFIG_NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.infra import StaticSite + from projspec.artifact.process import Server + + super().parse0() + + pkg_mgr = self._pkg_manager() + self._artifacts["dev"] = Server( + proj=self.proj, + cmd=[pkg_mgr, "run", "dev"], + ) + self._artifacts["build"] = StaticSite( + proj=self.proj, + cmd=[pkg_mgr, "run", "build"], + fn=f"{self.proj.url}/dist/index.html", + ) + self._artifacts["preview"] = Server( + proj=self.proj, + cmd=[pkg_mgr, "run", "preview"], + ) + + def _pkg_manager(self) -> str: + if "yarn.lock" in self.proj.basenames: + return "yarn" + if "pnpm-lock.yaml" in self.proj.basenames: + return "pnpm" + if "bun.lock" in self.proj.basenames or "bun.lockb" in self.proj.basenames: + return "bun" + return "npm" + + @staticmethod + def _create(path: str) -> None: + run_subprocess( + [ + "npm", + "create", + "vite@latest", + path, + "--", + "--template", + "vanilla", + ], + cwd=os.path.dirname(path) or ".", + output=False, + ) + + +class Pnpm(Node): + """Node project managed with pnpm.""" + + spec_doc = "https://pnpm.io/package_json" + + def match(self) -> bool: + return "pnpm-lock.yaml" in self.proj.basenames + + def parse(self) -> None: + from projspec.content.environment import Environment, Stack, Precision + from projspec.artifact.python_env import LockFile + + super().parse0() + + try: + with self.proj.fs.open(f"{self.proj.url}/pnpm-lock.yaml", "rt") as f: + import yaml as _yaml + + lock = _yaml.safe_load(f) + except Exception: + lock = {} + + self._artifacts["lock_file"] = LockFile( + proj=self.proj, + cmd=["pnpm", "install"], + fn=self.proj.basenames["pnpm-lock.yaml"], + ) + + if isinstance(lock, dict): + pkgs = list(lock.get("packages", {}).keys()) + if pkgs: + self._contents.setdefault("environment", AttrDict())[ + "pnpm_lock" + ] = Environment( + proj=self.proj, + stack=Stack.NPM, + packages=pkgs, + precision=Precision.LOCK, + ) + + @staticmethod + def _create(path: str) -> None: + run_subprocess(["pnpm", "init"], cwd=path, output=False) + + +class Bun(Node): + """Node project managed with Bun.""" + + spec_doc = "https://bun.sh/docs/install/lockfile" + + def match(self) -> bool: + return bool({"bun.lock", "bun.lockb"}.intersection(self.proj.basenames)) + + def parse(self) -> None: + from projspec.artifact.python_env import LockFile + + super().parse0() + + lock_name = "bun.lock" if "bun.lock" in self.proj.basenames else "bun.lockb" + self._artifacts["lock_file"] = LockFile( + proj=self.proj, + cmd=["bun", "install"], + fn=self.proj.basenames[lock_name], + ) + + @staticmethod + def _create(path: str) -> None: + run_subprocess(["bun", "init", "-y"], cwd=path, output=False) + + +class Deno(ProjectSpec): + """Deno project. + + Note: this is a separate runtime, not a Node project. + """ + + spec_doc = "https://docs.deno.com/runtime/fundamentals/configuration/" + + _CONFIG_NAMES = {"deno.json", "deno.jsonc"} + + def match(self) -> bool: + return bool(self._CONFIG_NAMES.intersection(self.proj.basenames)) + + def parse(self) -> None: + import json + from projspec.artifact.base import FileArtifact + from projspec.artifact.process import Process, Server + from projspec.artifact.python_env import LockFile + from projspec.content.executable import Command + from projspec.content.metadata import DescriptiveMetadata + + fname = next(n for n in self._CONFIG_NAMES if n in self.proj.basenames) + try: + with self.proj.get_file(fname) as f: + cfg = json.loads(f.read()) + except Exception as exc: + raise ParseFailed(f"Could not read {fname}: {exc}") from exc + + if not isinstance(cfg, dict): + raise ParseFailed(f"{fname} did not parse to a mapping") + + meta: dict[str, str] = {} + for key in ("name", "version", "description"): + if val := cfg.get(key): + meta[key] = str(val) + + conts = AttrDict() + if meta: + conts["descriptive_metadata"] = DescriptiveMetadata( + proj=self.proj, meta=meta + ) + + arts = AttrDict() + tasks = cfg.get("tasks", {}) + cmds = AttrDict() + for task_name, task_cmd in tasks.items(): + cmd_list = ["deno", "task", task_name] + cmds[task_name] = Command(proj=self.proj, cmd=cmd_list) + arts[task_name] = Process(proj=self.proj, cmd=cmd_list) + + if cmds: + conts["command"] = cmds + + # Lock file + if "deno.lock" in self.proj.basenames: + arts["lock_file"] = LockFile( + proj=self.proj, + cmd=["deno", "cache", "--reload", "mod.ts"], + fn=self.proj.basenames["deno.lock"], + ) + + # Main entry point + main = cfg.get("main") or cfg.get("exports") + if main and isinstance(main, str): + arts["run"] = Process( + proj=self.proj, + cmd=["deno", "run", "--allow-all", main], + ) + + self._contents = conts + self._artifacts = arts + + @staticmethod + def _create(path: str) -> None: + run_subprocess( + ["deno", "init", path], + cwd=os.path.dirname(path) or ".", + output=False, + ) diff --git a/src/projspec/proj/webapp.py b/src/projspec/proj/webapp.py index 6761442..1e72eb3 100644 --- a/src/projspec/proj/webapp.py +++ b/src/projspec/proj/webapp.py @@ -1,7 +1,7 @@ import os from projspec.proj import ProjectSpec, ParseFailed -from projspec.utils import _ipynb_to_py, run_subprocess +from projspec.utils import _ipynb_to_py, run_subprocess, AttrDict # TODO: webapp Servers should (optionally?) call threading.Timer(0.5, webbrowser.open(..)); # but then it must not block, and we need to set/infer the URL including port. @@ -58,7 +58,10 @@ class Streamlit(ProjectSpec): spec_doc = "https://docs.streamlit.io/deploy/streamlit-community-cloud/deploy-your-app/file-organization" # see also "https://docs.streamlit.io/develop/api-reference/configuration/config.toml", which is # mainly theme and server config. - server_args = {"port_arg": "--server.address", "address_arg": "--server.port"} + server_args = { + "port_arg": "--server.address", + "address_arg": "--server.port", + } def match(self) -> bool: # more possible layouts @@ -170,7 +173,9 @@ def parse(self) -> None: if has_import and has_app: name = path.rsplit("/", 1)[-1].replace(".py", "") self.artifacts["server"][name] = Server( - proj=self.proj, cmd=["marimo", "run", path], **self.server_args + proj=self.proj, + cmd=["marimo", "run", path], + **self.server_args, ) if not self.artifacts["server"]: @@ -284,7 +289,9 @@ def parse(self) -> None: if has_import and has_app: name = path.rsplit("/", 1)[-1].replace(".py", "") self.artifacts["server"][name] = Server( - proj=self.proj, cmd=["fastapi", "run", path], **self.server_args + proj=self.proj, + cmd=["fastapi", "run", path], + **self.server_args, ) if not self.artifacts["server"]: @@ -423,3 +430,127 @@ def _create(path: str) -> None: pn.panel("Hello World").servable() """ ) + + +class Gradio(ProjectSpec): + """Gradio machine learning demo and web app. + + Detected by scanning Python files for ``import gradio`` or ``gr.Interface`` / ``gr.Blocks``. + """ + + spec_doc = "https://www.gradio.app/docs/gradio/interface" + server_args = {"port_arg": "--server-port", "address_arg": "--server-name"} + + def match(self) -> bool: + return ( + any(fn.endswith(".py") for fn in self.proj.scanned_files) + or "app.py" in self.proj.basenames + ) + + def parse(self) -> None: + from projspec.artifact.process import Server + + servers = {} + for path, content in self.proj.scanned_files.items(): + if not path.endswith(".py"): + continue + content = content.decode() + has_import = "import gradio" in content or "from gradio" in content + has_app = ( + "gr.Interface(" in content + or "gr.Blocks(" in content + or "gradio.Interface(" in content + ) + if has_import and has_app: + name = path.rsplit("/", 1)[-1].replace(".py", "") + servers[name] = Server( + proj=self.proj, + cmd=["python", path], + **self.server_args, + ) + + if not servers: + raise ParseFailed + + self._contents = AttrDict() + self._artifacts = AttrDict(server=servers) + + @staticmethod + def _create(path: str) -> None: + with open(f"{path}/app.py", "wt") as f: + # https://www.gradio.app/guides/quickstart + f.write( + """import gradio as gr + +def greet(name): + return f"Hello, {name}!" + +demo = gr.Interface(fn=greet, inputs="text", outputs="text") + +if __name__ == "__main__": + demo.launch() +""" + ) + + +class Shiny(ProjectSpec): + """Shiny for Python web application. + + Detected by scanning Python files for ``from shiny import`` combined with + ``app = App(`` or ``@app.`` decorator usage. Also detects ``app.py`` at root. + """ + + spec_doc = "https://shiny.posit.co/py/docs/overview.html" + server_args = {"port_arg": "--port", "address_arg": "--host"} + + def match(self) -> bool: + return ( + any(fn.endswith(".py") for fn in self.proj.scanned_files) + or "app.py" in self.proj.basenames + ) + + def parse(self) -> None: + from projspec.artifact.process import Server + + servers = {} + for path, content in self.proj.scanned_files.items(): + if not path.endswith(".py"): + continue + content = content.decode() + has_import = "from shiny" in content or "import shiny" in content + has_app = "App(" in content or "@app." in content or "app_ui" in content + if has_import and has_app: + name = path.rsplit("/", 1)[-1].replace(".py", "") + servers[name] = Server( + proj=self.proj, + cmd=["shiny", "run", path], + **self.server_args, + ) + + if not servers: + raise ParseFailed + + self._contents = AttrDict() + self._artifacts = AttrDict(server=servers) + + @staticmethod + def _create(path: str) -> None: + with open(f"{path}/app.py", "wt") as f: + # https://shiny.posit.co/py/docs/overview.html + f.write( + """from shiny import App, render, ui + +app_ui = ui.page_fluid( + ui.h2("Hello, Shiny!"), + ui.input_text("name", "Enter your name:", value="World"), + ui.output_text_verbatim("greeting"), +) + +def server(input, output, session): + @render.text + def greeting(): + return f"Hello, {input.name()}!" + +app = App(app_ui, server) +""" + ) diff --git a/src/projspec/proj/workflows.py b/src/projspec/proj/workflows.py deleted file mode 100644 index 8660f1d..0000000 --- a/src/projspec/proj/workflows.py +++ /dev/null @@ -1,91 +0,0 @@ -import yaml - -from projspec.proj import ProjectSpec -from projspec.utils import AttrDict - - -class MLFlow(ProjectSpec): - spec_doc = ( - "https://mlflow.org/docs/latest/ml/projects/#mlproject-file-configuration" - ) - - def match(self) -> bool: - return "MLFlow" in self.proj.basenames - - def parse(self) -> None: - from projspec.content.environment import Environment, Precision, Stack - from projspec.artifact.process import Process - from projspec.content.executable import Command - - with self.proj.fs.open(self.proj.basenames["MLFlow"], "rt") as f: - meta = yaml.safe_load(f) - if "python_env" in meta: - with self.get_file(meta["python_env"], text=True) as f: - env = yaml.safe_load(f) - self.contents["environment"] = Environment( - stack=Stack.PIP, - precision=Precision.SPEC, - packages=env.get("dependencies", []) - + [f"python {env.get('python', '')}"], - proj=self.proj, - ) - elif "conda_env" in meta: - with self.get_file(meta["conda_env"], text=True) as f: - env = yaml.safe_load(f) - self.contents["environment"] = Environment( - stack=Stack.CONDA, - precision=Precision.SPEC, - packages=env.get("dependencies", []), - channels=env.get("channels"), - proj=self.proj, - ) - for name, cmd in meta.get("entry_points", {}).items(): - self.contents.setdefault("command", AttrDict())[name] = Command( - proj=self.proj, cmd=cmd["command"] - ) - self.artifacts.setdefault("process", AttrDict())[name] = Process( - proj=self.proj, cmd=["mlflow", "run", ".", "-e", name] - ) - - @staticmethod - def _create(path: str) -> None: - with open(f"{path}/MLFlow", "w") as f: - # https://github.com/mlflow/mlflow-example - f.write( - """ -name: tutorial - -conda_env: conda.yaml - -entry_points: - main: - parameters: - alpha: {type: float, default: 0.5} - l1_ratio: {type: float, default: 0.1} - command: "python train.py {alpha} {l1_ratio}" -""" - ) - with open(f"{path}/conda.yaml", "w") as f: - f.write( - """ -name: ml-project -channels: - - conda-forge -dependencies: - - python=3.9 -""" - ) - with open(f"{path}/train.py", "w") as f: - f.write( - """ -# MLFlow code -""" - ) - - -# TODO: prefect https://docs.prefect.io/v3/how-to-guides/configuration/ -# manage-settings#configure-settings-for-a-project - -# TODO: apache airflow? (is complex!) - -# TODO: dbt https://docs.getdbt.com/reference/dbt_project.yml diff --git a/src/projspec/tools.py b/src/projspec/tools.py index 269d21e..6d8ff63 100644 --- a/src/projspec/tools.py +++ b/src/projspec/tools.py @@ -1,5 +1,12 @@ +from __future__ import annotations + +import platform +import subprocess +import sys from dataclasses import dataclass, field +from projspec.utils import is_installed + @dataclass class ToolInfo: @@ -24,9 +31,6 @@ class ToolInfo: TOOLS: dict[str, ToolInfo] = { t.name: t for t in [ - # ------------------------------------------------------------------ - # Python ecosystem - # ------------------------------------------------------------------ ToolInfo( name="uv", description="Extremely fast Python package and project manager (pip/venv/build replacement).", @@ -45,7 +49,6 @@ class ToolInfo: "uv python install 3.12", "conda install python=3.12", "brew install python", - "https://www.python.org/downloads/", "winget install --id=Python.Python.3", ], ), @@ -70,17 +73,17 @@ class ToolInfo: "pipx install pre-commit", ], ), - # ------------------------------------------------------------------ - # Conda ecosystem - # ------------------------------------------------------------------ ToolInfo( name="conda", description="Cross-platform package and environment manager (Anaconda/Miniconda/Miniforge).", install_suggestions=[ - "https://github.com/conda-forge/miniforge#install", - "https://docs.conda.io/en/latest/miniconda.html", "brew install --cask miniforge", "winget install --id=Anaconda.Miniconda3", + ( + "mkdir -p ~/miniconda3 && wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh " + "&& bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 && rm -rf ~/miniconda3/miniconda.sh " + "&& ~/miniconda3/bin/conda init bash" + ), ], ), ToolInfo( @@ -98,7 +101,6 @@ class ToolInfo: "conda install -c conda-forge rattler-build", "cargo install rattler-build", "brew install rattler-build", - "https://github.com/prefix-dev/rattler-build/releases", ], ), ToolInfo( @@ -111,22 +113,15 @@ class ToolInfo: "conda install -c conda-forge pixi", ], ), - # ------------------------------------------------------------------ - # Containers - # ------------------------------------------------------------------ ToolInfo( name="docker", description="Container platform for building, shipping, and running applications.", install_suggestions=[ - "https://www.docker.com/products/docker-desktop/", "brew install --cask docker", "sudo apt-get install docker-ce docker-ce-cli containerd.io", "sudo dnf install docker-ce docker-ce-cli containerd.io", ], ), - # ------------------------------------------------------------------ - # Node / JavaScript ecosystem - # ------------------------------------------------------------------ ToolInfo( name="node", description="JavaScript runtime environment (Node.js).", @@ -134,7 +129,6 @@ class ToolInfo: "curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash && nvm install --lts", "conda install -c conda-forge nodejs", "brew install node", - "https://nodejs.org/en/download/", "winget install --id=OpenJS.NodeJS", ], ), @@ -142,8 +136,8 @@ class ToolInfo: name="npm", description="Default package manager bundled with Node.js.", install_suggestions=[ - "https://nodejs.org/en/download/", "nvm install --lts", + "brew install node", "conda install -c conda-forge nodejs", ], ), @@ -154,7 +148,6 @@ class ToolInfo: "npm install -g yarn", "brew install yarn", "conda install -c conda-forge yarn", - "https://yarnpkg.com/getting-started/install", ], ), ToolInfo( @@ -175,9 +168,6 @@ class ToolInfo: "brew install copier", ], ), - # ------------------------------------------------------------------ - # Rust ecosystem - # ------------------------------------------------------------------ ToolInfo( name="cargo", description="Rust package manager and build tool.", @@ -198,9 +188,6 @@ class ToolInfo: "cargo install maturin", ], ), - # ------------------------------------------------------------------ - # Version control - # ------------------------------------------------------------------ ToolInfo( name="git", description="Distributed version control system.", @@ -210,12 +197,8 @@ class ToolInfo: "sudo dnf install git", "conda install -c conda-forge git", "winget install --id=Git.Git", - "https://git-scm.com/downloads", ], ), - # ------------------------------------------------------------------ - # Web frameworks / app runners - # ------------------------------------------------------------------ ToolInfo( name="streamlit", description="Framework for turning Python scripts into shareable web apps.", @@ -280,9 +263,6 @@ class ToolInfo: "conda install -c conda-forge briefcase", ], ), - # ------------------------------------------------------------------ - # MLOps - # ------------------------------------------------------------------ ToolInfo( name="mlflow", description="Open-source platform for managing the ML lifecycle.", @@ -292,6 +272,258 @@ class ToolInfo: "uv add mlflow", ], ), + ToolInfo( + name="task", + description="Task runner / build tool using Taskfile.yml (go-task).", + install_suggestions=[ + "brew install go-task", + "conda install -c conda-forge go-task", + 'sh -c "$(curl --location https://taskfile.dev/install.sh)" -- -d -b /usr/local/bin', + "winget install --id=Task.Task", + ], + ), + ToolInfo( + name="just", + description="Command runner for project-specific scripts defined in a justfile.", + install_suggestions=[ + "brew install just", + "conda install -c conda-forge just", + "cargo install just", + "winget install --id=Casey.Just", + ], + ), + ToolInfo( + name="tox", + description="Generic Python test automation and virtualenv management tool.", + install_suggestions=[ + "pip install tox", + "pipx install tox", + "conda install -c conda-forge tox", + "uv tool install tox", + ], + ), + ToolInfo( + name="nox", + description="Flexible Python test automation, similar to tox but using plain Python.", + install_suggestions=[ + "pip install nox", + "pipx install nox", + "conda install -c conda-forge nox", + "uv tool install nox", + ], + ), + ToolInfo( + name="dbt", + description="Data transformation tool that runs SQL models against a data warehouse.", + install_suggestions=[ + "pip install dbt-core", + "uv add dbt-core", + "conda install -c conda-forge dbt-core", + ], + ), + ToolInfo( + name="quarto", + description="Open-source scientific and technical publishing system.", + install_suggestions=[ + "brew install --cask quarto", + "conda install -c conda-forge quarto", + "winget install --id=Posit.Quarto", + ], + ), + ToolInfo( + name="prefect", + description="Workflow orchestration platform for data and ML pipelines.", + install_suggestions=[ + "pip install prefect", + "uv add prefect", + "conda install -c conda-forge prefect", + ], + ), + ToolInfo( + name="dagster", + description="Cloud-native data orchestration platform for data pipelines.", + install_suggestions=[ + "pip install dagster dagster-webserver", + "uv add dagster dagster-webserver", + "conda install -c conda-forge dagster", + ], + ), + ToolInfo( + name="kedro", + description="Framework for creating reproducible, maintainable data science pipelines.", + install_suggestions=[ + "pip install kedro", + "uv add kedro", + "conda install -c conda-forge kedro", + "pipx install kedro", + ], + ), + ToolInfo( + name="airflow", + description="Platform for programmatically authoring, scheduling, and monitoring workflows.", + install_suggestions=[ + "pip install apache-airflow", + "uv add apache-airflow", + "conda install -c conda-forge apache-airflow", + ], + ), + ToolInfo( + name="snakemake", + description="Workflow management system for reproducible and scalable data analyses.", + install_suggestions=[ + "pip install snakemake", + "conda install -c conda-forge -c bioconda snakemake", + "uv add snakemake", + "mamba install -c conda-forge -c bioconda snakemake", + ], + ), + ToolInfo( + name="mkdocs", + description="Static site generator for project documentation, written in Python.", + install_suggestions=[ + "pip install mkdocs", + "uv add mkdocs", + "conda install -c conda-forge mkdocs", + "pipx install mkdocs", + ], + ), + ToolInfo( + name="sphinx-build", + description="Sphinx documentation builder (invoked as sphinx-build).", + install_suggestions=[ + "pip install sphinx", + "uv add sphinx", + "conda install -c conda-forge sphinx", + "pipx install sphinx", + ], + ), + ToolInfo( + name="sphinx-autobuild", + description="Live-reloading Sphinx documentation server.", + install_suggestions=[ + "pip install sphinx-autobuild", + "uv add sphinx-autobuild", + "conda install -c conda-forge sphinx-autobuild", + ], + ), + ToolInfo( + name="mdbook", + description="Utility to create modern online books from Markdown files (used by the Rust project).", + install_suggestions=[ + "cargo install mdbook", + "brew install mdbook", + "conda install -c conda-forge mdbook", + ], + ), + ToolInfo( + name="terraform", + description="Infrastructure as Code tool by HashiCorp for provisioning cloud resources.", + install_suggestions=[ + "brew install terraform", + "conda install -c conda-forge terraform", + "winget install --id=Hashicorp.Terraform", + ], + ), + ToolInfo( + name="ansible-playbook", + description="Ansible playbook runner for automating configuration and deployment.", + install_suggestions=[ + "pip install ansible", + "uv add ansible", + "conda install -c conda-forge ansible", + "brew install ansible", + "pipx install ansible", + ], + ), + ToolInfo( + name="pulumi", + description="Infrastructure as Code platform supporting multiple languages.", + install_suggestions=[ + "curl -fsSL https://get.pulumi.com | sh", + "brew install pulumi/tap/pulumi", + "conda install -c conda-forge pulumi", + "winget install --id=Pulumi.Pulumi", + ], + ), + ToolInfo( + name="cdk", + description="AWS Cloud Development Kit CLI for defining cloud infrastructure in code.", + install_suggestions=[ + "npm install -g aws-cdk", + "npx aws-cdk@latest", + ], + ), + ToolInfo( + name="earthly", + description="Build automation tool combining Makefile and Dockerfile syntax.", + install_suggestions=[ + "brew install earthly/earthly/earthly", + "sudo /bin/sh -c 'wget https://github.com/earthly/earthly/releases/latest/download/earthly-linux-amd64 -O /usr/local/bin/earthly && chmod +x /usr/local/bin/earthly'", + "winget install --id=Earthly.Earthly", + ], + ), + ToolInfo( + name="nixpacks", + description="Build app source code into OCI images using Nix, without a Dockerfile.", + install_suggestions=[ + "curl -sSL https://nixpacks.com/install.sh | bash", + "brew install railwayapp/tap/nixpacks", + ], + ), + ToolInfo( + name="vagrant", + description="Tool for building and managing portable virtual machine environments.", + install_suggestions=[ + "brew install --cask vagrant", + "winget install --id=Hashicorp.Vagrant", + "conda install -c conda-forge vagrant", + ], + ), + ToolInfo( + name="pnpm", + description="Fast, disk-efficient Node.js package manager.", + install_suggestions=[ + "npm install -g pnpm", + "brew install pnpm", + "winget install --id=pnpm.pnpm", + ], + ), + ToolInfo( + name="bun", + description="Fast all-in-one JavaScript runtime, bundler, and package manager.", + install_suggestions=[ + "curl -fsSL https://bun.sh/install | bash", + "brew install oven-sh/bun/bun", + "winget install --id=Oven-sh.Bun", + ], + ), + ToolInfo( + name="deno", + description="Secure JavaScript/TypeScript runtime built on V8.", + install_suggestions=[ + "curl -fsSL https://deno.land/install.sh | sh", + "brew install deno", + "conda install -c conda-forge deno", + "winget install --id=DenoLand.Deno", + ], + ), + ToolInfo( + name="npx", + description="Node.js package runner bundled with npm; executes packages without installing.", + install_suggestions=[ + "nvm install --lts", + "conda install -c conda-forge nodejs", + ], + ), + ToolInfo( + name="shiny", + description="Shiny for Python — build interactive web apps from Python scripts.", + install_suggestions=[ + "pip install shiny", + "uv add shiny", + "conda install -c conda-forge shiny", + ], + ), ] } @@ -302,7 +534,7 @@ def suggest(tool_name: str) -> str: Parameters ---------- tool_name: - The executable name as it appears in ``TOOLS`` (e.g. ``"uv"``). + The executable name as it appears in `TOOLS` (e.g. `"uv"`). Returns ------- @@ -332,3 +564,181 @@ def suggest(tool_name: str) -> str: for command in info.install_suggestions: lines.append(f" {command}") return "\n".join(lines) + + +def _is_url(s: str) -> bool: + return s.startswith(("https://", "http://")) + + +def _is_shell_string(s: str) -> bool: + """True when *s* requires a POSIX shell (contains a pipe, redirect, etc.).""" + return any(ch in s for ch in ("|", ">", "<", "&&", ";")) + + +def _leading_executable(s: str) -> str: + """Return the first word of an install string (the executable to invoke).""" + return s.split()[0] if s.split() else "" + + +# Platform names returned by sys.platform +_WINDOWS_PLATFORMS = {"win32", "cygwin", "msys"} +_IS_POSIX = sys.platform not in _WINDOWS_PLATFORMS +_CURRENT_PLATFORM = sys.platform if _IS_POSIX else "windows" + + +def _method_is_viable(install_string: str) -> bool: + """Return True when *install_string* can in principle be run on this machine. + + Rules: + - URL strings are never directly executable. + - `winget` strings are only viable on Windows. + - Shell one-liners (containing `|`, `>`, etc.) are only viable on POSIX. + - `brew` is only viable when Homebrew is present (i.e. on macOS/Linux with + Homebrew installed). + - For everything else: viable if the leading executable exists on PATH. + """ + if _is_url(install_string): + return False + + # winget is Windows-only + if _leading_executable(install_string) == "winget": + return not _IS_POSIX + + # Shell one-liners require a POSIX shell + if _is_shell_string(install_string): + return _IS_POSIX and _leading_executable(install_string) in is_installed + + exe = _leading_executable(install_string) + return exe in is_installed + + +def _preferred_install_methods() -> list[str]: + """Return the user-configured ordered preference list of installer names. + + Reads `preferred_install_methods` from projspec config (a list of + installer executable names, e.g. `["uv", "conda", "pip"]`). Falls back + to a sensible platform-appropriate default ordering when not configured. + """ + from projspec.config import get_conf + + user = get_conf("preferred_install_methods") + + # Sensible first line install options + defaults: list[str] = [ + "uv", + "conda", + "mamba", + "pip", + "pip3", + "pipx", + "cargo", + "npm", + "npx", + ] + if not _IS_POSIX: + defaults.append("winget") + # shell one-liners come last among executable methods + defaults += ["brew", "curl", "sh", "bash", "sudo"] + + actual = user + [_ for _ in defaults if _ not in user] + return actual + + +def _rank_install_string(s: str, preference_order: list[str]) -> tuple[int, int]: + """Return a (preference_rank, original_index) sort key for *s*. + + Lower is better. Strings whose leading executable appears earlier in + *preference_order* sort first. URLs and shell strings without a + recognisable leading executable go to the end. + """ + exe = _leading_executable(s) + try: + rank = preference_order.index(exe) + except ValueError: + rank = len(preference_order) + return rank + + +def choose_install_method(tool_name: str) -> str | None: + """Pick the best viable install method for *tool_name* on this machine. + + Selection algorithm + ------------------- + 1. Look up *tool_name* in :data:`TOOLS`. If not found, return `None`. + 2. Filter `install_suggestions` to those that are *viable* on the current + platform (see :func:`_method_is_viable`). + 3. Among the viable candidates, rank them by the ordered preference list + obtained from :func:`_preferred_install_methods` (which reads + `preferred_install_methods` from the projspec config, falling back to + a sensible platform default). + 4. Return the best-ranked candidate, or `None` if no viable candidate + exists. + + Parameters + ---------- + tool_name: + The executable name as it appears in :data:`TOOLS` (e.g. `"uv"`). + + Returns + ------- + str or None + The chosen install string (e.g. `"pip install uv"`), or `None` + when the tool is unknown or no viable install method was found. + """ + info = TOOLS.get(tool_name) + if info is None: + return None + + preference = _preferred_install_methods() + viable = [s for s in info.install_suggestions if _method_is_viable(s)] + if not viable: + return None + + return min(viable, key=lambda s: _rank_install_string(s, preference)) + + +def install_tool(tool_name: str) -> int: + """Install *tool_name* using the best available method for this machine. + + Selects an install command via :func:`choose_install_method`, then + executes it. Shell-style strings (those containing `|`, `>`, etc.) + are run with `subprocess.call(..., shell=True)`; regular space-separated + commands are split and passed as a list. + + Parameters + ---------- + tool_name: + The executable name as it appears in :data:`TOOLS` (e.g. `"uv"`). + + Returns + ------- + int + The exit code of the install command (0 = success). + + Raises + ------ + ValueError + If *tool_name* is not found in :data:`TOOLS`. + RuntimeError + If no viable install method exists for the current platform. + """ + info = TOOLS.get(tool_name) + if info is None: + raise ValueError( + f"Unknown tool {tool_name!r}. " f"Known tools: {', '.join(sorted(TOOLS))}" + ) + + method = choose_install_method(tool_name) + if method is None: + raise RuntimeError( + f"No viable install method found for {tool_name!r} " + f"on {_CURRENT_PLATFORM!r}. " + f"Available suggestions:\n" + + "\n".join(f" {s}" for s in info.install_suggestions) + ) + + if _is_shell_string(method): + # Shell one-liners must run in a POSIX shell + return subprocess.call(method, shell=True) + else: + return subprocess.call(method.split()) diff --git a/src/projspec/utils.py b/src/projspec/utils.py index 0fcea37..3917576 100644 --- a/src/projspec/utils.py +++ b/src/projspec/utils.py @@ -176,10 +176,11 @@ def __init__(self): self.env = _linked_local_path(sys.executable) def exists(self, cmd: str, refresh=False): - """Test if command can be called, by starting a subprocess + """Test if command can be called by starting a subprocess - This is more costly what some PATH lookup (i.e., what ``which()`` does), but also - more rigorous. + This is more costly what some PATH lookup (i.e., what `which()` does), but also + more rigorous. We cache the result - currently for the session, and + eventually persistently. """ if refresh or (self.env, cmd) not in self.cache: try: diff --git a/tests/test_data_project.py b/tests/test_data_project.py index 9f71ff0..3dae345 100644 --- a/tests/test_data_project.py +++ b/tests/test_data_project.py @@ -8,21 +8,11 @@ from projspec.utils import from_dict -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - - def _data_project(tmp_path): """Return a projspec.Project rooted at *tmp_path* (no walk needed).""" return projspec.Project(str(tmp_path)) -# --------------------------------------------------------------------------- -# Detection tests -# --------------------------------------------------------------------------- - - class TestDataDetection: def test_csv_detected(self, tmp_path): (tmp_path / "data.csv").write_text("x,y\n1,2\n3,4\n") @@ -45,11 +35,6 @@ def test_no_data_files_not_detected(self, tmp_path): assert "data" not in proj.specs -# --------------------------------------------------------------------------- -# Parse / DataResource field tests -# --------------------------------------------------------------------------- - - class TestDataParse: def test_single_csv_resource(self, tmp_path): (tmp_path / "sales.csv").write_text("col1,col2\n1,a\n2,b\n") @@ -98,11 +83,6 @@ def test_total_size_nonzero(self, tmp_path): assert dr.total_size > 0 -# --------------------------------------------------------------------------- -# Serialisation: to_dict -# --------------------------------------------------------------------------- - - class TestDataResourceToDict: def _make_dr(self, tmp_path): (tmp_path / "items.csv").write_text("id,val\n1,a\n2,b\n") @@ -121,11 +101,6 @@ def test_compact_omits_html(self, tmp_path): assert "_html" not in d -# --------------------------------------------------------------------------- -# Serialisation: from_dict round-trip -# --------------------------------------------------------------------------- - - class TestDataResourceRoundTrip: def _roundtrip(self, dr): """Serialise to JSON and rehydrate, returning the new DataResource.""" @@ -211,23 +186,14 @@ def test_roundtrip_html_survives_missing_sample_path(self, tmp_path): assert dr2._repr_html_() == html_original -# --------------------------------------------------------------------------- -# Conditional parse: sentinel / byte-majority logic -# --------------------------------------------------------------------------- - - class TestDataConditionalParse: """Tests for the 'other project types present' guard in Data.parse().""" - # -- helpers -- - def _big_csv(self, path, rows=500): """Write a CSV large enough to dominate byte counts.""" content = "id,value\n" + "\n".join(f"{i},{i * 2}" for i in range(rows)) path.write_text(content) - # -- pure data directories (no sentinels) -- - def test_pure_data_dir_no_sentinel(self, tmp_path): """No sentinel → Data always parsed regardless of byte ratios.""" (tmp_path / "data.csv").write_text("x\n1\n") @@ -248,10 +214,8 @@ def test_dvc_companion_not_a_sentinel(self, tmp_path): proj = _data_project(tmp_path) assert "data" in proj.specs - # -- mixed dirs where data dominates -- - def test_sentinel_present_data_majority(self, tmp_path): - """Sentinel present but data files are majority of bytes → Data parsed.""" + """Sentinel is present, but data files are the majority of bytes → Data parsed.""" self._big_csv(tmp_path / "data.csv") # large data file (tmp_path / "pyproject.toml").write_text( "[project]\nname='x'\n" @@ -299,7 +263,7 @@ def test_has_non_data_sentinels_true(self, tmp_path): (tmp_path / "data.csv").write_text("x\n1\n") (tmp_path / "pyproject.toml").write_text("") - proj = projspec.Project.__new__(projspec.Project) + proj = object.__new__(projspec.Project) import fsspec proj.fs = fsspec.filesystem("file") @@ -317,7 +281,7 @@ def test_has_non_data_sentinels_false(self, tmp_path): from projspec.proj.data_dir import Data (tmp_path / "data.csv").write_text("x\n1\n") - proj = projspec.Project.__new__(projspec.Project) + proj = object.__new__(projspec.Project) import fsspec proj.fs = fsspec.filesystem("file") @@ -336,7 +300,7 @@ def test_data_bytes_majority_true(self, tmp_path): self._big_csv(tmp_path / "data.csv") (tmp_path / "small.py").write_text("x = 1\n") - proj = projspec.Project.__new__(projspec.Project) + proj = object.__new__(projspec.Project) import fsspec proj.fs = fsspec.filesystem("file") @@ -351,7 +315,7 @@ def test_data_bytes_majority_false(self, tmp_path): (tmp_path / "main.py").write_text("x = 1\n" * 5000) (tmp_path / "tiny.csv").write_text("a\n1\n") - proj = projspec.Project.__new__(projspec.Project) + proj = object.__new__(projspec.Project) import fsspec proj.fs = fsspec.filesystem("file") diff --git a/tests/test_new_specs.py b/tests/test_new_specs.py new file mode 100644 index 0000000..f41a6c5 --- /dev/null +++ b/tests/test_new_specs.py @@ -0,0 +1,2018 @@ +"""Tests for new project spec types added in the bulk expansion. + +Structure +--------- +Each spec family gets one class with: + - test_match_positive – the spec IS detected given the right files + - test_match_negative – the spec is NOT detected without those files + - test_parse_contents – expected content keys are present after parse() + - test_parse_artifacts – expected artifact keys are present after parse() + - (where applicable) test_parse_detail – spot-check on specific parsed values + +Helper +------ +``make_spec(cls, tmpdir, files)`` writes *files* (dict of rel-path → text) into +*tmpdir* and returns a freshly constructed spec instance with _contents and +_artifacts pre-initialised to None (matching the state before parse() is called +by ProjectSpec.__init__). +""" + +import json +import os +import textwrap + +import pytest +import yaml + +import projspec + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def write_files(tmpdir, files: dict[str, str]) -> str: + """Write *files* into *tmpdir* and return the directory path.""" + path = str(tmpdir) + for rel, content in files.items(): + full = os.path.join(path, rel) + os.makedirs(os.path.dirname(full), exist_ok=True) + with open(full, "w") as f: + f.write(textwrap.dedent(content)) + return path + + +def make_proj(tmpdir, files: dict[str, str]): + path = write_files(tmpdir, files) + return projspec.Project(path) + + +def raw_spec(cls, proj): + """Instantiate a spec bypassing __init__'s match() call, for manual testing.""" + inst = cls.__new__(cls) + inst.proj = proj + inst._contents = None + inst._artifacts = None + return inst + + +# --------------------------------------------------------------------------- +# CI/CD specs +# --------------------------------------------------------------------------- + + +class TestGitHubActions: + FILES = { + ".github/workflows/ci.yml": """\ + name: CI + on: + push: + branches: [main] + jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import GitHubActions + + assert raw_spec(GitHubActions, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.cicd import GitHubActions + + assert not raw_spec(GitHubActions, proj).match() + + def test_parse_contents(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import GitHubActions + + spec = raw_spec(GitHubActions, proj) + spec.parse() + assert "ci_workflow" in spec._contents + + def test_parse_detail(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import GitHubActions + + spec = raw_spec(GitHubActions, proj) + spec.parse() + wf = list(spec._contents["ci_workflow"].values())[0] + assert wf.provider == "github" + assert "test" in wf.jobs + assert "push" in wf.triggers + + def test_multiple_workflows(self, tmpdir): + files = dict(self.FILES) + files[".github/workflows/release.yml"] = textwrap.dedent( + """\ + name: Release + on: [push] + jobs: + build: + runs-on: ubuntu-latest + steps: [] + """ + ) + proj = make_proj(tmpdir, files) + from projspec.proj.cicd import GitHubActions + + spec = raw_spec(GitHubActions, proj) + spec.parse() + assert len(spec._contents["ci_workflow"]) == 2 + + +class TestGitLabCI: + FILES = { + ".gitlab-ci.yml": """\ + stages: + - test + - deploy + test: + stage: test + script: + - pytest + deploy: + stage: deploy + script: + - echo deploy + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import GitLabCI + + assert raw_spec(GitLabCI, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.cicd import GitLabCI + + assert not raw_spec(GitLabCI, proj).match() + + def test_parse_contents(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import GitLabCI + + spec = raw_spec(GitLabCI, proj) + spec.parse() + wf = spec._contents["ci_workflow"] + assert wf.provider == "gitlab" + assert "test" in wf.jobs + assert "deploy" in wf.jobs + assert "test" in wf.triggers + + +class TestCircleCI: + FILES = { + ".circleci/config.yml": """\ + version: 2.1 + jobs: + build: + docker: + - image: cimg/python:3.11 + steps: + - checkout + - run: pytest + workflows: + main: + jobs: + - build + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import CircleCI + + assert raw_spec(CircleCI, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.cicd import CircleCI + + assert not raw_spec(CircleCI, proj).match() + + def test_parse_contents(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import CircleCI + + spec = raw_spec(CircleCI, proj) + spec.parse() + wf = spec._contents["ci_workflow"] + assert wf.provider == "circleci" + assert "build" in wf.jobs + + +class TestTaskfile: + FILES = { + "Taskfile.yml": """\ + version: '3' + tasks: + build: + desc: Build the project + cmds: + - echo building + test: + desc: Run tests + cmds: + - pytest + lint: + cmds: + - ruff check . + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import Taskfile + + assert raw_spec(Taskfile, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.cicd import Taskfile + + assert not raw_spec(Taskfile, proj).match() + + def test_match_variant_names(self, tmpdir): + for name in ("Taskfile.yaml", "taskfile.yml", "taskfile.yaml"): + proj = make_proj( + tmpdir, {name: "version: '3'\ntasks:\n x:\n cmds: [echo]\n"} + ) + from projspec.proj.cicd import Taskfile + + assert raw_spec(Taskfile, proj).match(), f"{name} should match" + + def test_parse_contents(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import Taskfile + + spec = raw_spec(Taskfile, proj) + spec.parse() + assert "build" in spec._contents["command"] + assert "test" in spec._contents["command"] + assert "lint" in spec._contents["command"] + + def test_parse_artifacts(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import Taskfile + + spec = raw_spec(Taskfile, proj) + spec.parse() + assert "build" in spec._artifacts["process"] + assert spec._artifacts["process"]["build"].cmd == ["task", "build"] + + +class TestJustFile: + FILES = { + "justfile": """\ + # Build the project + build: + cargo build --release + + # Run tests + test: + cargo test + + fmt: + cargo fmt + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import JustFile + + assert raw_spec(JustFile, proj).match() + + def test_match_Justfile_capitalised(self, tmpdir): + proj = make_proj(tmpdir, {"Justfile": "build:\n echo ok\n"}) + from projspec.proj.cicd import JustFile + + assert raw_spec(JustFile, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.cicd import JustFile + + assert not raw_spec(JustFile, proj).match() + + def test_parse_recipes(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.cicd import JustFile + + spec = raw_spec(JustFile, proj) + spec.parse() + assert "build" in spec._contents["command"] + assert "test" in spec._contents["command"] + assert "fmt" in spec._contents["command"] + assert spec._artifacts["process"]["build"].cmd == ["just", "build"] + + +class TestTox: + FILES_INI = { + "tox.ini": """\ + [tox] + envlist = py311, py312, lint + + [testenv] + deps = pytest + commands = pytest {posargs} + + [testenv:lint] + deps = ruff + commands = ruff check . + """, + } + + FILES_PYPROJECT = { + "pyproject.toml": """\ + [tool.tox] + [tool.tox.env.py311] + commands = [["pytest"]] + [tool.tox.env.lint] + commands = [["ruff", "check", "."]] + """, + } + + def test_match_tox_ini(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_INI) + from projspec.proj.cicd import Tox + + assert raw_spec(Tox, proj).match() + + def test_match_pyproject(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_PYPROJECT) + from projspec.proj.cicd import Tox + + assert raw_spec(Tox, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.cicd import Tox + + assert not raw_spec(Tox, proj).match() + + def test_parse_envlist(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_INI) + from projspec.proj.cicd import Tox + + spec = raw_spec(Tox, proj) + spec.parse() + assert "py311" in spec._artifacts["process"] + assert "py312" in spec._artifacts["process"] + assert "lint" in spec._artifacts["process"] + assert spec._artifacts["process"]["lint"].cmd == ["tox", "-e", "lint"] + + def test_parse_testenv_sections(self, tmpdir): + # tox.ini with named [testenv:X] sections but no envlist + proj = make_proj(tmpdir, {"tox.ini": "[testenv:unit]\ncommands=pytest\n"}) + from projspec.proj.cicd import Tox + + spec = raw_spec(Tox, proj) + spec.parse() + assert "unit" in spec._artifacts["process"] + + def test_parse_fallback_generic(self, tmpdir): + # tox.ini with no envlist and no [testenv:X] sections + proj = make_proj(tmpdir, {"tox.ini": "[tox]\n"}) + from projspec.proj.cicd import Tox + + spec = raw_spec(Tox, proj) + spec.parse() + assert "tox" in spec._artifacts["process"] + + +# --------------------------------------------------------------------------- +# Data / ML / Workflow specs +# --------------------------------------------------------------------------- + + +class TestDbt: + FILES = { + "dbt_project.yml": """\ + name: 'analytics' + version: '1.0.0' + config-version: 2 + profile: 'default' + model-paths: ['models'] + """, + "models/example.sql": "SELECT 1 AS id", + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Dbt + + assert raw_spec(Dbt, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.dataworkflows import Dbt + + assert not raw_spec(Dbt, proj).match() + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Dbt + + spec = raw_spec(Dbt, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["name"] == "analytics" + assert meta["profile"] == "default" + + def test_parse_standard_commands(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Dbt + + spec = raw_spec(Dbt, proj) + spec.parse() + for cmd in ("run", "test", "build", "compile", "seed"): + assert cmd in spec._contents["command"], f"missing command: {cmd}" + assert cmd in spec._artifacts["process"], f"missing artifact: {cmd}" + + def test_parse_command_values(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Dbt + + spec = raw_spec(Dbt, proj) + spec.parse() + assert spec._contents["command"]["run"].cmd == ["dbt", "run"] + assert spec._artifacts["process"]["build"].cmd == ["dbt", "build"] + + +class TestQuarto: + FILES_PROJECT = { + "_quarto.yml": """\ + project: + type: website + title: My Quarto Site + output-dir: _site + format: + html: + theme: cosmo + """, + "index.qmd": "---\ntitle: Home\n---\nHello!\n", + } + + FILES_SINGLE_QMD = { + "report.qmd": "---\ntitle: Report\n---\nContent here.\n", + } + + def test_match_quarto_yml(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_PROJECT) + from projspec.proj.dataworkflows import Quarto + + assert raw_spec(Quarto, proj).match() + + def test_match_qmd_file(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_SINGLE_QMD) + from projspec.proj.dataworkflows import Quarto + + assert raw_spec(Quarto, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.dataworkflows import Quarto + + assert not raw_spec(Quarto, proj).match() + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_PROJECT) + from projspec.proj.dataworkflows import Quarto + + spec = raw_spec(Quarto, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["title"] == "My Quarto Site" + + def test_parse_artifacts(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_PROJECT) + from projspec.proj.dataworkflows import Quarto + + spec = raw_spec(Quarto, proj) + spec.parse() + assert "render" in spec._artifacts + assert "preview" in spec._artifacts + from projspec.artifact.infra import StaticSite + from projspec.artifact.process import Server + + assert isinstance(spec._artifacts["render"], StaticSite) + assert isinstance(spec._artifacts["preview"], Server) + + def test_parse_custom_output_dir(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_PROJECT) + from projspec.proj.dataworkflows import Quarto + + spec = raw_spec(Quarto, proj) + spec.parse() + assert "_site" in spec._artifacts["render"].fn + + +class TestNox: + FILES = { + "noxfile.py": """\ + import nox + + @nox.session + def tests(session): + session.install('pytest') + session.run('pytest') + + @nox.session(python=['3.11', '3.12']) + def lint(session): + session.install('ruff') + session.run('ruff', 'check', '.') + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Nox + + assert raw_spec(Nox, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.dataworkflows import Nox + + assert not raw_spec(Nox, proj).match() + + def test_parse_sessions(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Nox + + spec = raw_spec(Nox, proj) + spec.parse() + assert "tests" in spec._artifacts["process"] + assert "lint" in spec._artifacts["process"] + assert spec._artifacts["process"]["tests"].cmd == ["nox", "-s", "tests"] + + def test_parse_empty_noxfile_fallback(self, tmpdir): + proj = make_proj(tmpdir, {"noxfile.py": "# no sessions\n"}) + from projspec.proj.dataworkflows import Nox + + spec = raw_spec(Nox, proj) + spec.parse() + # Falls back to generic nox command + assert "nox" in spec._artifacts["process"] + + +class TestPrefect: + FILES = { + "prefect.yaml": """\ + name: my-pipeline + deployments: + - name: daily-etl + entrypoint: flows/etl.py:run_etl + - name: weekly-report + entrypoint: flows/report.py:run_report + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Prefect + + assert raw_spec(Prefect, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.dataworkflows import Prefect + + assert not raw_spec(Prefect, proj).match() + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Prefect + + spec = raw_spec(Prefect, proj) + spec.parse() + assert spec._contents["descriptive_metadata"].meta["name"] == "my-pipeline" + + def test_parse_deployments_as_stages(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Prefect + + spec = raw_spec(Prefect, proj) + spec.parse() + assert "daily-etl" in spec._contents["pipeline_stage"] + assert "weekly-report" in spec._contents["pipeline_stage"] + + def test_parse_artifacts(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Prefect + + spec = raw_spec(Prefect, proj) + spec.parse() + assert "run" in spec._artifacts["process"] + + +class TestSnakemake: + FILES = { + "Snakefile": """\ + rule all: + input: "results/output.txt" + + rule process: + input: "data/input.txt" + output: "results/output.txt" + shell: "cat {input} > {output}" + + rule download: + output: "data/input.txt" + shell: "echo hello > {output}" + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Snakemake + + assert raw_spec(Snakemake, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.dataworkflows import Snakemake + + assert not raw_spec(Snakemake, proj).match() + + def test_parse_rules_as_stages(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Snakemake + + spec = raw_spec(Snakemake, proj) + spec.parse() + stages = spec._contents.get("pipeline_stage", {}) + # 'all' is filtered out; process and download should appear + assert "process" in stages + assert "download" in stages + assert "all" not in stages + + def test_parse_run_artifact(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Snakemake + + spec = raw_spec(Snakemake, proj) + spec.parse() + assert "run" in spec._artifacts["process"] + assert spec._artifacts["process"]["run"].cmd == [ + "snakemake", + "--cores", + "all", + ] + + +class TestAirflow: + FILES = { + "dags/etl_dag.py": """\ + from airflow import DAG + from airflow.operators.python import PythonOperator + + dag = DAG(dag_id='etl_pipeline', schedule='@daily') + """, + "dags/report_dag.py": """\ + from airflow import DAG + dag = DAG(dag_id='weekly_report', schedule='@weekly') + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Airflow + + assert raw_spec(Airflow, proj).match() + + def test_match_negative_no_dags(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.dataworkflows import Airflow + + assert not raw_spec(Airflow, proj).match() + + def test_match_negative_empty_dags(self, tmpdir): + # dags/ exists but no .py files + os.makedirs(str(tmpdir.join("dags")), exist_ok=True) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Airflow + + assert not raw_spec(Airflow, proj).match() + + def test_parse_dag_ids_as_stages(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Airflow + + spec = raw_spec(Airflow, proj) + spec.parse() + stages = spec._contents.get("pipeline_stage", {}) + assert "etl_pipeline" in stages + assert "weekly_report" in stages + + def test_parse_commands(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Airflow + + spec = raw_spec(Airflow, proj) + spec.parse() + assert "standalone" in spec._contents["command"] + assert "webserver" in spec._contents["command"] + + +class TestKedro: + FILES = { + "pyproject.toml": """\ + [tool.kedro] + package_name = "my_project" + project_name = "My Project" + kedro_init_version = "0.19.0" + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Kedro + + assert raw_spec(Kedro, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.dataworkflows import Kedro + + assert not raw_spec(Kedro, proj).match() + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Kedro + + spec = raw_spec(Kedro, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["package_name"] == "my_project" + + def test_parse_default_run_command(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.dataworkflows import Kedro + + spec = raw_spec(Kedro, proj) + spec.parse() + assert "run" in spec._contents["command"] + assert "run" in spec._artifacts + + def test_parse_pipeline_discovery(self, tmpdir): + # Create pipeline directories under src//pipelines/ + files = dict(self.FILES) + files["src/my_project/pipelines/ingestion/__init__.py"] = "" + files["src/my_project/pipelines/processing/__init__.py"] = "" + proj = make_proj(tmpdir, files) + from projspec.proj.dataworkflows import Kedro + + spec = raw_spec(Kedro, proj) + spec.parse() + assert "ingestion" in spec._contents.get("pipeline_stage", {}) + assert "processing" in spec._contents.get("pipeline_stage", {}) + + +class TestDagster: + FILES_PYPROJECT = { + "pyproject.toml": """\ + [tool.dagster] + module_name = "my_assets" + """, + } + + FILES_YAML = { + "dagster.yaml": "telemetry:\n enabled: false\n", + } + + def test_match_pyproject(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_PYPROJECT) + from projspec.proj.dataworkflows import Dagster + + assert raw_spec(Dagster, proj).match() + + def test_match_dagster_yaml(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_YAML) + from projspec.proj.dataworkflows import Dagster + + assert raw_spec(Dagster, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.dataworkflows import Dagster + + assert not raw_spec(Dagster, proj).match() + + def test_parse_artifacts(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_PYPROJECT) + from projspec.proj.dataworkflows import Dagster + + spec = raw_spec(Dagster, proj) + spec.parse() + assert "dev" in spec._artifacts + assert "materialize" in spec._artifacts + from projspec.artifact.process import Server + + assert isinstance(spec._artifacts["dev"], Server) + + +# --------------------------------------------------------------------------- +# Documentation specs +# --------------------------------------------------------------------------- + + +class TestMkDocs: + FILES = { + "mkdocs.yml": """\ + site_name: My Project Docs + site_description: Documentation for my project + site_author: Alice + docs_dir: docs + site_dir: site + nav: + - Home: index.md + theme: + name: material + """, + "docs/index.md": "# Welcome\n", + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.documentation import MkDocs + + assert raw_spec(MkDocs, proj).match() + + def test_match_yaml_extension(self, tmpdir): + proj = make_proj(tmpdir, {"mkdocs.yaml": "site_name: X\n"}) + from projspec.proj.documentation import MkDocs + + assert raw_spec(MkDocs, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.documentation import MkDocs + + assert not raw_spec(MkDocs, proj).match() + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.documentation import MkDocs + + spec = raw_spec(MkDocs, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["site_name"] == "My Project Docs" + assert meta["site_author"] == "Alice" + + def test_parse_artifacts(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.documentation import MkDocs + + spec = raw_spec(MkDocs, proj) + spec.parse() + assert "docs" in spec._artifacts + assert "serve" in spec._artifacts + from projspec.artifact.infra import StaticSite + from projspec.artifact.process import Server + + assert isinstance(spec._artifacts["docs"], StaticSite) + assert isinstance(spec._artifacts["serve"], Server) + + def test_parse_output_path(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.documentation import MkDocs + + spec = raw_spec(MkDocs, proj) + spec.parse() + assert "site" in spec._artifacts["docs"].fn + + def test_parse_custom_site_dir(self, tmpdir): + proj = make_proj(tmpdir, {"mkdocs.yml": "site_name: X\nsite_dir: public\n"}) + from projspec.proj.documentation import MkDocs + + spec = raw_spec(MkDocs, proj) + spec.parse() + assert "public" in spec._artifacts["docs"].fn + + +class TestSphinx: + FILES_ROOT = { + "conf.py": """\ + project = "MyLib" + author = "Bob" + release = "1.2.3" + extensions = [] + html_theme = "alabaster" + """, + "index.rst": ".. toctree::\n intro\n", + } + + FILES_DOCS_DIR = { + "docs/conf.py": """\ + project = "MyLib" + author = "Carol" + release = "0.1" + extensions = [] + html_theme = "furo" + """, + "docs/index.rst": "Content\n", + } + + def test_match_root_conf(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_ROOT) + from projspec.proj.documentation import Sphinx + + assert raw_spec(Sphinx, proj).match() + + def test_match_docs_conf(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_DOCS_DIR) + from projspec.proj.documentation import Sphinx + + assert raw_spec(Sphinx, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.documentation import Sphinx + + assert not raw_spec(Sphinx, proj).match() + + def test_parse_metadata_root(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_ROOT) + from projspec.proj.documentation import Sphinx + + spec = raw_spec(Sphinx, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["project"] == "MyLib" + assert meta["author"] == "Bob" + assert meta["release"] == "1.2.3" + + def test_parse_artifacts(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_ROOT) + from projspec.proj.documentation import Sphinx + + spec = raw_spec(Sphinx, proj) + spec.parse() + assert "docs" in spec._artifacts + assert "autobuild" in spec._artifacts + from projspec.artifact.infra import StaticSite + from projspec.artifact.process import Server + + assert isinstance(spec._artifacts["docs"], StaticSite) + assert isinstance(spec._artifacts["autobuild"], Server) + + def test_parse_docs_dir_layout(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_DOCS_DIR) + from projspec.proj.documentation import Sphinx + + spec = raw_spec(Sphinx, proj) + spec.parse() + assert "docs" in spec._artifacts["docs"].fn + + +# --------------------------------------------------------------------------- +# Infrastructure specs +# --------------------------------------------------------------------------- + + +class TestDockerCompose: + FILES = { + "docker-compose.yml": """\ + name: myapp + services: + web: + image: nginx:latest + ports: + - "8080:80" + db: + image: postgres:15 + environment: + POSTGRES_PASSWORD: secret + cache: + image: redis:7 + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import DockerCompose + + assert raw_spec(DockerCompose, proj).match() + + def test_match_compose_yaml(self, tmpdir): + proj = make_proj( + tmpdir, {"compose.yaml": "services:\n app:\n image: alpine\n"} + ) + from projspec.proj.infra import DockerCompose + + assert raw_spec(DockerCompose, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.infra import DockerCompose + + assert not raw_spec(DockerCompose, proj).match() + + def test_parse_services_as_dependencies(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import DockerCompose + + spec = raw_spec(DockerCompose, proj) + spec.parse() + deps = spec._contents["service_dependency"] + assert "web" in deps + assert "db" in deps + assert "cache" in deps + + def test_parse_service_details(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import DockerCompose + + spec = raw_spec(DockerCompose, proj) + spec.parse() + db = spec._contents["service_dependency"]["db"] + assert db.image == "postgres:15" + assert db.service_type == "postgres" + assert db.version == "15" + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import DockerCompose + + spec = raw_spec(DockerCompose, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["name"] == "myapp" + assert "web" in meta["services"] + + def test_parse_stack_artifact(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import DockerCompose + + spec = raw_spec(DockerCompose, proj) + spec.parse() + from projspec.artifact.infra import ComposeStack + + assert isinstance(spec._artifacts["stack"], ComposeStack) + assert "docker-compose.yml" in spec._artifacts["stack"].compose_file + + +class TestTerraform: + FILES = { + "main.tf": """\ + terraform { + required_version = ">= 1.5" + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } + } + + resource "aws_s3_bucket" "data" { + bucket = "my-data-bucket" + } + + resource "aws_lambda_function" "handler" { + function_name = "my-handler" + role = "arn:aws:iam::123:role/role" + handler = "index.handler" + runtime = "python3.11" + } + """, + "variables.tf": 'variable "region" {\n default = "us-east-1"\n}\n', + "outputs.tf": "", + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Terraform + + assert raw_spec(Terraform, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.infra import Terraform + + assert not raw_spec(Terraform, proj).match() + + def test_parse_resource_types(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Terraform + + spec = raw_spec(Terraform, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert "aws_s3_bucket" in meta["resource_types"] + assert "aws_lambda_function" in meta["resource_types"] + + def test_parse_commands(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Terraform + + spec = raw_spec(Terraform, proj) + spec.parse() + for cmd in ("init", "validate", "apply", "destroy"): + assert cmd in spec._contents["command"] + assert cmd in spec._artifacts + + def test_parse_plan_artifact(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Terraform + + spec = raw_spec(Terraform, proj) + spec.parse() + from projspec.artifact.infra import TerraformPlan + + assert isinstance(spec._artifacts["plan"], TerraformPlan) + assert "plan.tfplan" in spec._artifacts["plan"].fn + + +class TestAnsible: + FILES_PLAYBOOK = { + "playbook.yml": """\ + --- + - name: Configure webservers + hosts: webservers + tasks: + - name: Install nginx + apt: + name: nginx + state: present + """, + "inventory": "webserver1 ansible_host=192.168.1.1\n", + } + + FILES_ROLES = { + "site.yml": "---\n- hosts: all\n roles:\n - common\n", + "roles/common/tasks/main.yml": "---\n- name: update\n apt: update_cache=yes\n", + } + + def test_match_playbook(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_PLAYBOOK) + from projspec.proj.infra import Ansible + + assert raw_spec(Ansible, proj).match() + + def test_match_ansible_cfg(self, tmpdir): + proj = make_proj(tmpdir, {"ansible.cfg": "[defaults]\ninventory = inventory\n"}) + from projspec.proj.infra import Ansible + + assert raw_spec(Ansible, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.infra import Ansible + + assert not raw_spec(Ansible, proj).match() + + def test_parse_playbook_commands(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_PLAYBOOK) + from projspec.proj.infra import Ansible + + spec = raw_spec(Ansible, proj) + spec.parse() + assert "playbook" in spec._contents["command"] + assert spec._contents["command"]["playbook"].cmd == [ + "ansible-playbook", + "playbook.yml", + ] + + def test_parse_site_yml(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_ROLES) + from projspec.proj.infra import Ansible + + spec = raw_spec(Ansible, proj) + spec.parse() + assert "site" in spec._contents["command"] + + +class TestPulumi: + FILES = { + "Pulumi.yaml": """\ + name: my-infra + runtime: python + description: Cloud infrastructure for my-infra + """, + } + + FILES_DICT_RUNTIME = { + "Pulumi.yaml": """\ + name: my-infra + runtime: + name: python + options: + virtualenv: venv + description: Uses dict runtime + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Pulumi + + assert raw_spec(Pulumi, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.infra import Pulumi + + assert not raw_spec(Pulumi, proj).match() + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Pulumi + + spec = raw_spec(Pulumi, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["name"] == "my-infra" + assert meta["runtime"] == "python" + + def test_parse_metadata_dict_runtime(self, tmpdir): + proj = make_proj(tmpdir, self.FILES_DICT_RUNTIME) + from projspec.proj.infra import Pulumi + + spec = raw_spec(Pulumi, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["runtime"] == "python" + + def test_parse_artifacts(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Pulumi + + spec = raw_spec(Pulumi, proj) + spec.parse() + from projspec.artifact.deployment import Deployment + + assert isinstance(spec._artifacts["deploy"], Deployment) + assert "preview" in spec._artifacts + + def test_parse_commands(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Pulumi + + spec = raw_spec(Pulumi, proj) + spec.parse() + assert "up" in spec._contents["command"] + assert "destroy" in spec._contents["command"] + assert spec._contents["command"]["up"].cmd == ["pulumi", "up", "--yes"] + + +class TestCDK: + FILES = { + "cdk.json": json.dumps( + { + "app": "npx ts-node --prefer-ts-exts bin/app.ts", + "context": {"@aws-cdk/core:enableStackNameDuplicates": True}, + } + ), + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import CDK + + assert raw_spec(CDK, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.infra import CDK + + assert not raw_spec(CDK, proj).match() + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import CDK + + spec = raw_spec(CDK, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert "ts-node" in meta["app"] + + def test_parse_commands(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import CDK + + spec = raw_spec(CDK, proj) + spec.parse() + for cmd in ("synth", "deploy", "destroy", "diff"): + assert cmd in spec._contents["command"] + + def test_parse_deploy_artifact(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import CDK + + spec = raw_spec(CDK, proj) + spec.parse() + from projspec.artifact.deployment import Deployment + + assert isinstance(spec._artifacts["deploy"], Deployment) + + +class TestEarthfile: + FILES = { + "Earthfile": """\ + VERSION 0.8 + + build: + FROM golang:1.21 + RUN go build ./... + + test: + FROM +build + RUN go test ./... + + docker: + FROM alpine:latest + COPY +build/app /app + ENTRYPOINT ["/app"] + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Earthfile + + assert raw_spec(Earthfile, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.infra import Earthfile + + assert not raw_spec(Earthfile, proj).match() + + def test_parse_targets(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Earthfile + + spec = raw_spec(Earthfile, proj) + spec.parse() + assert "build" in spec._contents["command"] + assert "test" in spec._contents["command"] + assert "docker" in spec._contents["command"] + assert spec._contents["command"]["build"].cmd == ["earthly", "+build"] + + def test_parse_uppercase_directives_filtered(self, tmpdir): + # VERSION, FROM, RUN etc. should not appear as targets + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Earthfile + + spec = raw_spec(Earthfile, proj) + spec.parse() + for key in spec._contents.get("command", {}): + assert ( + key == key.lower() or not key.isupper() + ), f"All-caps directive '{key}' should be filtered out" + + +class TestNixpacks: + FILES = { + "nixpacks.toml": """\ + [phases.setup] + nixPkgs = ['python311', 'poetry'] + + [phases.install] + cmds = ['poetry install --no-dev'] + + [start] + cmd = 'uvicorn app:app --host 0.0.0.0' + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Nixpacks + + assert raw_spec(Nixpacks, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.infra import Nixpacks + + assert not raw_spec(Nixpacks, proj).match() + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Nixpacks + + spec = raw_spec(Nixpacks, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert "setup" in meta["phases"] + assert "install" in meta["phases"] + assert "uvicorn" in meta["start_cmd"] + + def test_parse_docker_image_artifact(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Nixpacks + + spec = raw_spec(Nixpacks, proj) + spec.parse() + from projspec.artifact.process import Process + + assert isinstance(spec._artifacts["build"], Process) + assert "nixpacks" in spec._artifacts["build"].cmd[0] + + +class TestVagrant: + FILES = { + "Vagrantfile": """\ + Vagrant.configure("2") do |config| + config.vm.box = "ubuntu/jammy64" + config.vm.hostname = "dev-server" + config.vm.provider "virtualbox" do |vb| + vb.memory = "2048" + end + end + """, + } + + def test_match_positive(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Vagrant + + assert raw_spec(Vagrant, proj).match() + + def test_match_negative(self, tmpdir): + proj = make_proj(tmpdir, {}) + from projspec.proj.infra import Vagrant + + assert not raw_spec(Vagrant, proj).match() + + def test_parse_metadata(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Vagrant + + spec = raw_spec(Vagrant, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["box"] == "ubuntu/jammy64" + assert meta["hostname"] == "dev-server" + + def test_parse_commands(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Vagrant + + spec = raw_spec(Vagrant, proj) + spec.parse() + for cmd in ("up", "halt", "destroy", "ssh"): + assert cmd in spec._contents["command"] + assert spec._contents["command"]["up"].cmd == ["vagrant", "up"] + + def test_parse_vm_artifact(self, tmpdir): + proj = make_proj(tmpdir, self.FILES) + from projspec.proj.infra import Vagrant + + spec = raw_spec(Vagrant, proj) + spec.parse() + from projspec.artifact.process import Server + + assert isinstance(spec._artifacts["vm"], Server) + + +# --------------------------------------------------------------------------- +# Web framework specs (scan-based, no _create) +# --------------------------------------------------------------------------- + + +class TestGradio: + GRADIO_APP = """\ + import gradio as gr + + def predict(text): + return text.upper() + + demo = gr.Interface(fn=predict, inputs="text", outputs="text") + + if __name__ == "__main__": + demo.launch() + """ + + GRADIO_BLOCKS = """\ + import gradio as gr + + with gr.Blocks() as demo: + gr.Markdown("Hello!") + + demo.launch() + """ + + def test_match_positive(self, tmpdir): + write_files(tmpdir, {"app.py": self.GRADIO_APP}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Gradio + + assert raw_spec(Gradio, proj).match() + + def test_match_negative(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Gradio + + assert not raw_spec(Gradio, proj).match() + + def test_parse_interface(self, tmpdir): + write_files(tmpdir, {"app.py": self.GRADIO_APP}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Gradio + + spec = raw_spec(Gradio, proj) + spec.parse() + assert "server" in spec._artifacts + assert "app" in spec._artifacts["server"] + + def test_parse_blocks(self, tmpdir): + write_files(tmpdir, {"demo.py": self.GRADIO_BLOCKS}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Gradio + + spec = raw_spec(Gradio, proj) + spec.parse() + assert "demo" in spec._artifacts["server"] + + def test_parse_command_uses_python(self, tmpdir): + write_files(tmpdir, {"app.py": self.GRADIO_APP}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Gradio + + spec = raw_spec(Gradio, proj) + spec.parse() + cmd = spec._artifacts["server"]["app"].cmd + assert cmd[0] == "python" + + def test_parse_non_gradio_ignored(self, tmpdir): + write_files(tmpdir, {"app.py": "import flask\napp = Flask(__name__)\n"}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Gradio + + spec = raw_spec(Gradio, proj) + from projspec.proj.base import ParseFailed + + with pytest.raises(ParseFailed): + spec.parse() + + +class TestShiny: + SHINY_APP = """\ + from shiny import App, render, ui + + app_ui = ui.page_fluid( + ui.input_text("name", "Name:"), + ui.output_text_verbatim("greeting"), + ) + + def server(input, output, session): + @render.text + def greeting(): + return f"Hello, {input.name()}!" + + app = App(app_ui, server) + """ + + def test_match_positive(self, tmpdir): + write_files(tmpdir, {"app.py": self.SHINY_APP}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Shiny + + assert raw_spec(Shiny, proj).match() + + def test_match_negative(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Shiny + + assert not raw_spec(Shiny, proj).match() + + def test_parse_server(self, tmpdir): + write_files(tmpdir, {"app.py": self.SHINY_APP}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Shiny + + spec = raw_spec(Shiny, proj) + spec.parse() + assert "server" in spec._artifacts + assert "app" in spec._artifacts["server"] + cmd = spec._artifacts["server"]["app"].cmd + assert cmd[0] == "shiny" + assert "run" in cmd + + def test_parse_non_shiny_ignored(self, tmpdir): + write_files(tmpdir, {"app.py": "import streamlit as st\nst.write('hello')\n"}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.webapp import Shiny + + spec = raw_spec(Shiny, proj) + from projspec.proj.base import ParseFailed + + with pytest.raises(ParseFailed): + spec.parse() + + +# --------------------------------------------------------------------------- +# New content types +# --------------------------------------------------------------------------- + + +class TestCIWorkflow: + def test_fields(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.content.cicd import CIWorkflow + + wf = CIWorkflow( + proj=proj, + name="My Workflow", + triggers=["push", "pull_request"], + jobs=["build", "test"], + provider="github", + ) + assert wf.name == "My Workflow" + assert "push" in wf.triggers + assert "build" in wf.jobs + assert wf.provider == "github" + + def test_to_dict(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.content.cicd import CIWorkflow + + wf = CIWorkflow( + proj=proj, + name="CI", + triggers=["push"], + jobs=["test"], + provider="github", + ) + d = wf.to_dict(compact=True) + assert d["name"] == "CI" + assert d["provider"] == "github" + + def test_registered(self): + from projspec.content.base import registry + + assert "c_i_workflow" in registry + + +class TestPipelineStage: + def test_fields(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.content.cicd import PipelineStage + + stage = PipelineStage( + proj=proj, + name="process", + cmd=["snakemake", "process"], + depends_on=["download"], + ) + assert stage.name == "process" + assert stage.cmd == ["snakemake", "process"] + assert "download" in stage.depends_on + + def test_registered(self): + from projspec.content.base import registry + + assert "pipeline_stage" in registry + + +class TestServiceDependency: + def test_fields(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.content.cicd import ServiceDependency + + svc = ServiceDependency( + proj=proj, + name="db", + service_type="postgres", + version="15", + image="postgres:15", + ) + assert svc.name == "db" + assert svc.service_type == "postgres" + assert svc.version == "15" + + def test_registered(self): + from projspec.content.base import registry + + assert "service_dependency" in registry + + +# --------------------------------------------------------------------------- +# New artifact types +# --------------------------------------------------------------------------- + + +class TestComposeStack: + def test_fields(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.artifact.infra import ComposeStack + + stack = ComposeStack(proj=proj, file="docker-compose.yml") + assert stack.compose_file == "docker-compose.yml" + assert "docker" in stack.cmd[0] + assert "compose" in stack.cmd + + def test_registered(self): + from projspec.artifact.base import registry + + assert "compose_stack" in registry + + def test_state_unknown_remote(self, tmpdir): + from projspec.artifact.infra import ComposeStack + from unittest.mock import MagicMock + + # Simulate a remote (non-LocalFileSystem) project + proj = projspec.Project.__new__(projspec.Project) + mock_fs = MagicMock() + mock_fs.__class__.__name__ = "S3FileSystem" + # is_local() uses isinstance check against LocalFileSystem + import fsspec.implementations.local + + mock_fs.__class__ = type("S3FileSystem", (), {}) + proj.fs = mock_fs + proj.url = "bucket/prefix" + stack = ComposeStack(proj=proj) + # Remote project: state should be "" (unknown) + assert stack.state == "" + + +class TestStaticSite: + def test_fields(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.artifact.infra import StaticSite + + site = StaticSite( + proj=proj, cmd=["mkdocs", "build"], fn="/path/site/index.html" + ) + assert site.fn == "/path/site/index.html" + assert site.cmd == ["mkdocs", "build"] + + def test_registered(self): + from projspec.artifact.base import registry + + assert "static_site" in registry + + def test_is_done_when_file_exists(self, tmpdir): + path = str(tmpdir) + os.makedirs(os.path.join(path, "site")) + index = os.path.join(path, "site", "index.html") + open(index, "w").close() + proj = projspec.Project(path) + from projspec.artifact.infra import StaticSite + + site = StaticSite(proj=proj, cmd=["mkdocs", "build"], fn=index) + assert site._is_done() + + def test_is_clean_when_file_absent(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.artifact.infra import StaticSite + + site = StaticSite( + proj=proj, + cmd=["mkdocs", "build"], + fn=str(tmpdir.join("site/index.html")), + ) + assert site._is_clean() + + +class TestTerraformPlan: + def test_fields(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.artifact.infra import TerraformPlan + + plan = TerraformPlan(proj=proj) + assert "plan.tfplan" in plan.fn + assert plan.cmd == ["terraform", "plan", "-out", "plan.tfplan"] + + def test_custom_plan_file(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.artifact.infra import TerraformPlan + + plan = TerraformPlan(proj=proj, plan_file="infra.tfplan") + assert "infra.tfplan" in plan.fn + assert "infra.tfplan" in plan.cmd + + def test_registered(self): + from projspec.artifact.base import registry + + assert "terraform_plan" in registry + + +# --------------------------------------------------------------------------- +# Metaflow +# --------------------------------------------------------------------------- + +HELLO_FLOW = """\ +from metaflow import FlowSpec, step + + +class HelloFlow(FlowSpec): + @step + def start(self): + print("Hello!") + self.next(self.end) + + @step + def end(self): + print("Done.") + + +if __name__ == "__main__": + HelloFlow() +""" + +TRAIN_FLOW = """\ +from metaflow import FlowSpec, step, project, schedule, Parameter + + +@project(name="my_ml_project") +@schedule(daily=True) +class TrainFlow(FlowSpec): + learning_rate = Parameter("lr", default=0.01) + + @step + def start(self): + self.next(self.train) + + @step + def train(self): + print(f"Training with lr={self.learning_rate}") + self.next(self.end) + + @step + def end(self): + print("Training complete") + + +if __name__ == "__main__": + TrainFlow() +""" + +NOT_METAFLOW = """\ +import pandas as pd + +def process(): + return pd.DataFrame({"a": [1, 2, 3]}) +""" + + +class TestMetaflow: + def test_match_positive(self, tmpdir): + write_files(tmpdir, {"flow.py": HELLO_FLOW}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + assert raw_spec(Metaflow, proj).match() + + def test_match_negative_no_py(self, tmpdir): + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + assert not raw_spec(Metaflow, proj).match() + + def test_match_negative_non_metaflow_py(self, tmpdir): + write_files(tmpdir, {"script.py": NOT_METAFLOW}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + assert not raw_spec(Metaflow, proj).match() + + def test_match_requires_both_import_and_flowspec(self, tmpdir): + # import present but no FlowSpec subclass + write_files( + tmpdir, + {"util.py": "from metaflow import Parameter\nx = Parameter('n')\n"}, + ) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + assert not raw_spec(Metaflow, proj).match() + + def test_parse_run_command(self, tmpdir): + write_files(tmpdir, {"flow.py": HELLO_FLOW}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + spec = raw_spec(Metaflow, proj) + spec.parse() + assert "flow" in spec._contents["command"] + assert spec._contents["command"]["flow"].cmd == [ + "python", + "flow.py", + "run", + ] + + def test_parse_process_artifact(self, tmpdir): + write_files(tmpdir, {"flow.py": HELLO_FLOW}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + spec = raw_spec(Metaflow, proj) + spec.parse() + assert "flow" in spec._artifacts["process"] + assert spec._artifacts["process"]["flow"].cmd == [ + "python", + "flow.py", + "run", + ] + + def test_parse_step_names_as_pipeline_stages(self, tmpdir): + write_files(tmpdir, {"flow.py": HELLO_FLOW}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + spec = raw_spec(Metaflow, proj) + spec.parse() + stages = spec._contents.get("pipeline_stage", {}) + assert "flow.start" in stages + assert "flow.end" in stages + + def test_parse_project_name_from_decorator(self, tmpdir): + write_files(tmpdir, {"train.py": TRAIN_FLOW}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + spec = raw_spec(Metaflow, proj) + spec.parse() + meta = spec._contents["descriptive_metadata"].meta + assert meta["project"] == "my_ml_project" + + def test_parse_deployment_artifacts_when_scheduled(self, tmpdir): + write_files(tmpdir, {"train.py": TRAIN_FLOW}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + spec = raw_spec(Metaflow, proj) + spec.parse() + procs = spec._artifacts["process"] + assert "train.argo_create" in procs + assert "train.step_functions_create" in procs + assert procs["train.argo_create"].cmd == [ + "python", + "train.py", + "argo-workflows", + "create", + ] + assert procs["train.step_functions_create"].cmd == [ + "python", + "train.py", + "step-functions", + "create", + ] + + def test_parse_no_deployment_artifacts_without_schedule(self, tmpdir): + write_files(tmpdir, {"flow.py": HELLO_FLOW}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + spec = raw_spec(Metaflow, proj) + spec.parse() + procs = spec._artifacts["process"] + assert not any("argo" in k or "step_functions" in k for k in procs) + + def test_parse_multiple_flows(self, tmpdir): + write_files(tmpdir, {"flow.py": HELLO_FLOW, "train.py": TRAIN_FLOW}) + proj = projspec.Project(str(tmpdir)) + from projspec.proj.dataworkflows import Metaflow + + spec = raw_spec(Metaflow, proj) + spec.parse() + assert "flow" in spec._contents["command"] + assert "train" in spec._contents["command"] + + def test_create_writes_flow_file(self, tmpdir): + path = str(tmpdir) + from projspec.proj.dataworkflows import Metaflow + + Metaflow._create(path) + flow_file = os.path.join(path, "flow.py") + assert os.path.exists(flow_file) + content = open(flow_file).read() + assert "FlowSpec" in content + assert "@step" in content + assert "def start" in content + assert "def end" in content + assert "if __name__" in content + + def test_create_flow_class_name_derived_from_dir(self, tmpdir): + path = str(tmpdir.mkdir("my_pipeline")) + from projspec.proj.dataworkflows import Metaflow + + Metaflow._create(path) + content = open(os.path.join(path, "flow.py")).read() + assert "MyPipelineFlow" in content + + def test_roundtrip_create_and_detect(self, tmpdir): + """create() produces files that match() and parse() accept.""" + path = str(tmpdir) + proj = projspec.Project(path) + proj.create("Metaflow") + # Re-scan so scanned_files picks up the new flow.py + proj2 = projspec.Project(path) + assert "metaflow" in proj2 diff --git a/tests/test_roundtrips.py b/tests/test_roundtrips.py index 48d4985..fa2cc33 100644 --- a/tests/test_roundtrips.py +++ b/tests/test_roundtrips.py @@ -36,6 +36,29 @@ "MDBook", "RTD", "BackstageCatalog", + # CI/CD — file-only _create() + "GitHubActions", + "GitLabCI", + "CircleCI", + "Taskfile", + "JustFile", + "Tox", + # Data / ML workflows — file-only _create() + "Dbt", + "Quarto", + "Nox", + # Documentation — file-only _create() + "MkDocs", + "Sphinx", + # Infrastructure — file-only _create() + "DockerCompose", + "Terraform", + "Ansible", + "Pulumi", + "CDK", + "Earthfile", + "Nixpacks", + "Vagrant", ], ) def test_compliant(tmpdir, cls_name): diff --git a/tests/test_tools.py b/tests/test_tools.py new file mode 100644 index 0000000..2bf1123 --- /dev/null +++ b/tests/test_tools.py @@ -0,0 +1,356 @@ +"""Tests for projspec.tools.choose_install_method and install_tool.""" + +import subprocess +import sys +from contextlib import contextmanager +from unittest.mock import patch, MagicMock + +import pytest + +import projspec.tools as tools +from projspec.tools import ( + ToolInfo, + TOOLS, + _is_url, + _is_shell_string, + _leading_executable, + _method_is_viable, + _rank_install_string, + _preferred_install_methods, + choose_install_method, + install_tool, +) +from projspec.config import temp_conf + + +# --------------------------------------------------------------------------- +# Helpers for patching is_installed +# --------------------------------------------------------------------------- + + +@contextmanager +def installed(*executables: str): + """Context manager: make is_installed report only *executables* as present.""" + exe_set = set(executables) + with patch.object( + tools.is_installed, "exists", side_effect=lambda x, **kw: x in exe_set + ): + yield + + +@contextmanager +def nothing_installed(): + """Context manager: make is_installed report nothing as present.""" + with patch.object(tools.is_installed, "exists", return_value=False): + yield + + +# --------------------------------------------------------------------------- +# Classification helpers +# --------------------------------------------------------------------------- + + +class TestIsUrl: + def test_https(self): + assert _is_url("https://example.com/install.sh") + + def test_http(self): + assert _is_url("http://example.com") + + def test_pip_not_url(self): + assert not _is_url("pip install foo") + + def test_curl_not_url(self): + assert not _is_url("curl -sSL https://example.com | sh") + + +class TestIsShellString: + def test_pipe(self): + assert _is_shell_string("curl -sSL https://x.sh | sh") + + def test_redirect(self): + assert _is_shell_string("echo y > /tmp/x") + + def test_and_and(self): + assert _is_shell_string("cd /tmp && ./install.sh") + + def test_plain_pip(self): + assert not _is_shell_string("pip install uv") + + def test_plain_brew(self): + assert not _is_shell_string("brew install uv") + + +class TestLeadingExecutable: + def test_pip(self): + assert _leading_executable("pip install foo") == "pip" + + def test_curl(self): + assert _leading_executable("curl -sSL https://example.com | sh") == "curl" + + def test_empty(self): + assert _leading_executable("") == "" + + +# --------------------------------------------------------------------------- +# _method_is_viable +# --------------------------------------------------------------------------- + + +class TestMethodIsViable: + def test_url_never_viable(self): + assert not _method_is_viable("https://example.com/install") + + def test_winget_only_on_windows(self): + with patch.object(tools, "_IS_POSIX", True): + assert not _method_is_viable("winget install --id=foo.Bar") + with patch.object(tools, "_IS_POSIX", False): + with installed("winget"): + assert _method_is_viable("winget install --id=foo.Bar") + + def test_shell_string_requires_posix(self): + with patch.object(tools, "_IS_POSIX", False): + with installed("curl"): + assert not _method_is_viable("curl -sSL https://x.sh | sh") + + def test_shell_string_requires_leading_executable_present(self): + with patch.object(tools, "_IS_POSIX", True): + with nothing_installed(): + assert not _method_is_viable("curl -sSL https://x.sh | sh") + with installed("curl"): + assert _method_is_viable("curl -sSL https://x.sh | sh") + + def test_plain_command_needs_executable_on_path(self): + with nothing_installed(): + assert not _method_is_viable("pip install foo") + with installed("pip"): + assert _method_is_viable("pip install foo") + + def test_brew_needs_brew_present(self): + with nothing_installed(): + assert not _method_is_viable("brew install foo") + with installed("brew"): + assert _method_is_viable("brew install foo") + + +# --------------------------------------------------------------------------- +# _rank_install_string +# --------------------------------------------------------------------------- + + +class TestRankInstallString: + def test_early_preference_ranks_lower(self): + prefs = ["uv", "conda", "pip"] + assert _rank_install_string("uv add foo", prefs) < _rank_install_string( + "pip install foo", prefs + ) + assert _rank_install_string("conda install foo", prefs) < _rank_install_string( + "pip install foo", prefs + ) + + def test_unknown_executable_ranks_last(self): + prefs = ["uv", "pip"] + rank_unknown = _rank_install_string("obscure-tool install foo", prefs) + assert rank_unknown == len(prefs) + + def test_same_installer_same_rank(self): + prefs = ["pip", "conda"] + r1 = _rank_install_string("pip install foo", prefs) + r2 = _rank_install_string("pip install bar --extra-index-url x", prefs) + assert r1 == r2 + + +# --------------------------------------------------------------------------- +# _preferred_install_methods +# --------------------------------------------------------------------------- + + +class TestPreferredInstallMethods: + def test_returns_list(self): + result = _preferred_install_methods() + assert isinstance(result, list) + assert len(result) > 0 + + def test_config_override(self): + with temp_conf(preferred_install_methods=["conda", "pip"]): + result = _preferred_install_methods() + assert result[:2] == ["conda", "pip"] + + def test_empty_config_uses_defaults(self): + with temp_conf(preferred_install_methods=[]): + result = _preferred_install_methods() + assert "pip" in result + assert "uv" in result + + def test_posix_excludes_winget_by_default(self): + with patch.object(tools, "_IS_POSIX", True): + result = _preferred_install_methods() + assert "winget" not in result + + def test_windows_includes_winget_by_default(self): + with patch.object(tools, "_IS_POSIX", False): + result = _preferred_install_methods() + assert "winget" in result + + +# --------------------------------------------------------------------------- +# choose_install_method +# --------------------------------------------------------------------------- + + +class TestChooseInstallMethod: + def test_unknown_tool_returns_none(self): + assert choose_install_method("nonexistent-tool-xyz") is None + + def test_returns_string_for_known_tool_with_viable_method(self): + with installed("pip"): + result = choose_install_method("uv") + assert result is not None + assert isinstance(result, str) + + def test_prefers_configured_installer_when_present(self): + # mlflow has "uv add mlflow" — with uv on PATH and uv preferred, + # that suggestion should be chosen over pip + with temp_conf(preferred_install_methods=["uv", "pip"]): + with installed("uv", "pip"): + result = choose_install_method("mlflow") + assert result is not None + assert result.startswith("uv") + + def test_falls_back_to_pip_when_uv_absent(self): + with temp_conf(preferred_install_methods=["uv", "pip"]): + with installed("pip"): + result = choose_install_method("uv") + assert result is not None + assert result.startswith("pip") + + def test_shell_string_chosen_when_only_curl_available(self): + """When only curl is on PATH, a curl|sh one-liner should be chosen.""" + info = ToolInfo( + name="test-shell-tool", + description="Test tool", + install_suggestions=[ + "pip install test-shell-tool", + "curl -sSL https://example.com/install.sh | sh", + ], + ) + with patch.dict(tools.TOOLS, {"test-shell-tool": info}): + with temp_conf(preferred_install_methods=["pip", "curl"]): + with ( + installed("curl"), + patch.object(tools, "_IS_POSIX", True), + ): + result = choose_install_method("test-shell-tool") + assert result is not None + assert "curl" in result + assert "|" in result + + def test_url_never_chosen(self): + info = ToolInfo( + name="url-only-tool", + description="Only has a URL install", + install_suggestions=["https://example.com/install"], + ) + with patch.dict(tools.TOOLS, {"url-only-tool": info}): + result = choose_install_method("url-only-tool") + assert result is None + + def test_preference_order_respected(self): + info = ToolInfo( + name="multi-method-tool", + description="Has several install methods", + install_suggestions=[ + "pip install multi-method-tool", + "conda install -c conda-forge multi-method-tool", + "brew install multi-method-tool", + ], + ) + with patch.dict(tools.TOOLS, {"multi-method-tool": info}): + with temp_conf(preferred_install_methods=["conda", "pip", "brew"]): + with installed("pip", "conda", "brew"): + result = choose_install_method("multi-method-tool") + assert result is not None + assert result.startswith("conda") + + def test_winget_not_chosen_on_posix(self): + info = ToolInfo( + name="win-tool", + description="Windows-only tool", + install_suggestions=["winget install --id=foo.Bar"], + ) + with patch.dict(tools.TOOLS, {"win-tool": info}): + with patch.object(tools, "_IS_POSIX", True): + result = choose_install_method("win-tool") + assert result is None + + +# --------------------------------------------------------------------------- +# install_tool +# --------------------------------------------------------------------------- + + +class TestInstallTool: + def test_raises_for_unknown_tool(self): + with pytest.raises(ValueError, match="Unknown tool"): + install_tool("nonexistent-tool-xyz") + + def test_raises_when_no_viable_method(self): + with ( + nothing_installed(), + patch.object(tools, "_IS_POSIX", return_value=False), + ): + with pytest.raises(RuntimeError, match="No viable install method"): + install_tool("uv") + + def test_plain_command_uses_subprocess_call_with_list(self): + """Non-shell install strings are called as a list (no shell=True).""" + with ( + installed("pip"), + patch("subprocess.call", return_value=0) as mock_call, + ): + with temp_conf(preferred_install_methods=["pip"]): + rc = install_tool("uv") + assert rc == 0 + mock_call.assert_called_once() + call_args, call_kwargs = mock_call.call_args + assert isinstance(call_args[0], list) + assert call_kwargs.get("shell") is not True + + def test_shell_string_uses_shell_true(self): + """Shell one-liners are run with shell=True.""" + info = ToolInfo( + name="shell-install-tool", + description="Installed via curl pipe", + install_suggestions=["curl -sSL https://example.com/install.sh | sh"], + ) + with patch.dict(tools.TOOLS, {"shell-install-tool": info}): + with ( + installed("curl"), + patch.object(tools, "_IS_POSIX", True), + patch("subprocess.call", return_value=0) as mock_call, + temp_conf(preferred_install_methods=["curl"]), + ): + rc = install_tool("shell-install-tool") + assert rc == 0 + mock_call.assert_called_once() + call_args, call_kwargs = mock_call.call_args + assert call_kwargs.get("shell") is True + assert isinstance(call_args[0], str) + + def test_returns_exit_code(self): + with ( + installed("pip"), + patch("subprocess.call", return_value=42), + temp_conf(preferred_install_methods=["pip"]), + ): + rc = install_tool("uv") + assert rc == 42 + + def test_non_zero_exit_code_is_propagated(self): + with ( + installed("pip"), + patch("subprocess.call", return_value=1), + temp_conf(preferred_install_methods=["pip"]), + ): + rc = install_tool("uv") + assert rc == 1