From bf0d76ca24ea98697fb2813d5c920fccb39a052a Mon Sep 17 00:00:00 2001 From: Jonathan Tsai Date: Sat, 18 Apr 2026 13:42:59 -0700 Subject: [PATCH 1/8] Add initial flake8 HTK rules plugin --- .github/workflows/ci.yml | 23 +++++ .gitignore | 13 +++ LICENSE | 22 +++++ README.md | 49 ++++++++++ decisions/2026-04-18-initial-rule-set.md | 25 ++++++ pyproject.toml | 46 ++++++++++ src/flake8_htk_rules/__init__.py | 36 ++++++++ src/flake8_htk_rules/checks.py | 108 ++++++++++++++++++++++ tests/test_plugin.py | 110 +++++++++++++++++++++++ 9 files changed, 432 insertions(+) create mode 100644 .github/workflows/ci.yml create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 decisions/2026-04-18-initial-rule-set.md create mode 100644 pyproject.toml create mode 100644 src/flake8_htk_rules/__init__.py create mode 100644 src/flake8_htk_rules/checks.py create mode 100644 tests/test_plugin.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..a92e8cb --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,23 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - run: python -m pip install --upgrade pip + - run: python -m pip install -e ".[dev]" + - run: python -m unittest discover -s tests -v diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f0e9492 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +.coverage +.mypy_cache/ +.pytest_cache/ +.ruff_cache/ +.tox/ +build/ +dist/ +*.egg-info/ +__pycache__/ +*.py[cod] +.venv/ +venv/ + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..5862b21 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2026 Hacktoolkit + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/README.md b/README.md index 2e8851b..c241383 100644 --- a/README.md +++ b/README.md @@ -1 +1,50 @@ # flake8-htk-rules + +Hacktoolkit Flake8 rules for catching code that should not ship. + +## Installation + +```bash +pip install flake8-htk-rules +``` + +For local development: + +```bash +python -m pip install -e ".[dev]" +python -m unittest discover -s tests -v +``` + +## Rules + +| Code | Description | +| --- | --- | +| `HTK100` | Do not commit `print(...)` calls. | +| `HTK101` | Do not commit debugger traps such as `breakpoint()` or `pdb.set_trace()`. | +| `HTK102` | Do not commit HTK debug helper calls such as `fdb(...)` or `slack_debug(...)`. | + +## Flake8 Configuration + +```ini +[flake8] +select = HTK +``` + +Or combine with existing checks: + +```ini +[flake8] +extend-select = HTK +``` + +## Development + +The plugin is implemented as a single AST checker registered through the +`flake8.extension` entry point. Add new checks in +`src/flake8_htk_rules/checks.py` and cover them in `tests/`. + +Run the test suite without installing the package: + +```bash +PYTHONPATH=src python -m unittest discover -s tests -v +``` diff --git a/decisions/2026-04-18-initial-rule-set.md b/decisions/2026-04-18-initial-rule-set.md new file mode 100644 index 0000000..fff455c --- /dev/null +++ b/decisions/2026-04-18-initial-rule-set.md @@ -0,0 +1,25 @@ +# Initial Rule Set + +## Context + +The repo is intended to be a standalone Flake8 plugin for Hacktoolkit rules. +The original `accounts-django` extraction source was not available in the +local workspace, and GitHub access from this runtime was blocked by DNS. + +## Options + +- Wait for the source repo to become available. +- Create a real plugin package with conservative initial checks and tests. + +## Decision + +Create a standalone package with a modern `pyproject.toml`, `src/` layout, +Flake8 entry point, CI, and an initial HTK rule set focused on code that should +not ship: `print`, debugger traps, and HTK debug helpers. + +## Action Items + +- Add the exact extracted `accounts-django` rules once that source is reachable. +- Push this package to `hacktoolkit/flake8-htk-rules` when GitHub access is + available. + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b41ee81 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,46 @@ +[build-system] +requires = ["hatchling>=1.21"] +build-backend = "hatchling.build" + +[project] +name = "flake8-htk-rules" +version = "0.1.0" +description = "Hacktoolkit Flake8 rules for catching code that should not ship." +readme = "README.md" +requires-python = ">=3.9" +license = "MIT" +authors = [ + { name = "Hacktoolkit" }, + { name = "Jonathan Tsai" } +] +classifiers = [ + "Development Status :: 3 - Alpha", + "Environment :: Console", + "Framework :: Flake8", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Software Development :: Quality Assurance", +] +dependencies = [ + "flake8>=5", +] + +[project.optional-dependencies] +dev = [ + "build>=1.0", + "flake8>=5", + "pytest>=7", +] + +[project.urls] +Homepage = "https://github.com/hacktoolkit/flake8-htk-rules" +Issues = "https://github.com/hacktoolkit/flake8-htk-rules/issues" + +[project.entry-points."flake8.extension"] +HTK = "flake8_htk_rules:Plugin" + +[tool.hatch.build.targets.wheel] +packages = ["src/flake8_htk_rules"] + diff --git a/src/flake8_htk_rules/__init__.py b/src/flake8_htk_rules/__init__.py new file mode 100644 index 0000000..2f69473 --- /dev/null +++ b/src/flake8_htk_rules/__init__.py @@ -0,0 +1,36 @@ +"""Flake8 entry point for Hacktoolkit rules.""" + +from __future__ import annotations + +import ast +from collections.abc import Iterator + +from .checks import HtkVisitor + +__version__ = "0.1.0" + + +class Plugin: + """Flake8 AST plugin entry point.""" + + name = "flake8-htk-rules" + version = __version__ + + def __init__(self, tree: ast.AST, filename: str = "") -> None: + self.tree = tree + self.filename = filename + + def run(self) -> Iterator[tuple[int, int, str, type["Plugin"]]]: + visitor = HtkVisitor() + visitor.visit(self.tree) + for violation in visitor.violations: + yield ( + violation.line, + violation.column, + violation.message, + type(self), + ) + + +__all__ = ["Plugin", "__version__"] + diff --git a/src/flake8_htk_rules/checks.py b/src/flake8_htk_rules/checks.py new file mode 100644 index 0000000..f10aeff --- /dev/null +++ b/src/flake8_htk_rules/checks.py @@ -0,0 +1,108 @@ +"""AST checks for Hacktoolkit Flake8 rules.""" + +from __future__ import annotations + +import ast +from dataclasses import dataclass + + +HTK100 = "HTK100 do not commit print() calls" +HTK101 = "HTK101 do not commit debugger traps" +HTK102 = "HTK102 do not commit HTK debug helper calls" + +DEBUG_MODULES = {"pdb", "ipdb", "pudb", "wdb"} +DEBUG_METHODS = {"set_trace", "post_mortem", "pm", "runcall", "runctx"} +HTK_DEBUG_HELPERS = { + "fdb", + "fdb_json", + "fdebug", + "fdebug_json", + "slack_debug", + "slack_debug_json", +} + + +@dataclass(frozen=True, order=True) +class Violation: + line: int + column: int + message: str + + +class HtkVisitor(ast.NodeVisitor): + """Collect HTK rule violations from a Python AST.""" + + def __init__(self) -> None: + self.violations: list[Violation] = [] + self._debug_aliases: set[str] = set() + self._helper_aliases: set[str] = set() + + def visit_Import(self, node: ast.Import) -> None: + for alias in node.names: + module_name = alias.name.split(".", 1)[0] + if module_name in DEBUG_MODULES: + self._debug_aliases.add(alias.asname or module_name) + self.generic_visit(node) + + def visit_ImportFrom(self, node: ast.ImportFrom) -> None: + module_name = (node.module or "").split(".", 1)[0] + if module_name in DEBUG_MODULES: + for alias in node.names: + if alias.name in DEBUG_METHODS or alias.name == "*": + self._debug_aliases.add(alias.asname or alias.name) + if module_name == "htk": + for alias in node.names: + if alias.name in HTK_DEBUG_HELPERS or alias.name == "*": + self._helper_aliases.add(alias.asname or alias.name) + self.generic_visit(node) + + def visit_Call(self, node: ast.Call) -> None: + name = _call_name(node.func) + if name == "print": + self._add(node, HTK100) + elif _is_debugger_call(name, self._debug_aliases): + self._add(node, HTK101) + elif _is_htk_debug_helper(name, self._helper_aliases): + self._add(node, HTK102) + self.generic_visit(node) + + def _add(self, node: ast.AST, message: str) -> None: + self.violations.append( + Violation( + line=getattr(node, "lineno", 1), + column=getattr(node, "col_offset", 0), + message=message, + ) + ) + + +def _call_name(node: ast.AST) -> str: + if isinstance(node, ast.Name): + return node.id + if isinstance(node, ast.Attribute): + parent_name = _call_name(node.value) + if parent_name: + return f"{parent_name}.{node.attr}" + return node.attr + return "" + + +def _is_debugger_call(name: str, aliases: set[str]) -> bool: + if name == "breakpoint": + return True + if name in aliases: + return True + if "." not in name: + return False + module_name, method_name = name.rsplit(".", 1) + return module_name in DEBUG_MODULES | aliases and method_name in DEBUG_METHODS + + +def _is_htk_debug_helper(name: str, aliases: set[str]) -> bool: + if name in HTK_DEBUG_HELPERS | aliases: + return True + if "." not in name: + return False + _, method_name = name.rsplit(".", 1) + return method_name in HTK_DEBUG_HELPERS + diff --git a/tests/test_plugin.py b/tests/test_plugin.py new file mode 100644 index 0000000..cabd47b --- /dev/null +++ b/tests/test_plugin.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +import ast +import unittest + +from flake8_htk_rules import Plugin + + +def run_plugin(source: str) -> list[str]: + tree = ast.parse(source) + return [message for _, _, message, _ in Plugin(tree).run()] + + +class PluginTest(unittest.TestCase): + def test_allows_normal_code(self) -> None: + messages = run_plugin( + """ +def greet(name): + return f"hello {name}" +""" + ) + + self.assertEqual(messages, []) + + def test_reports_print_calls(self) -> None: + messages = run_plugin( + """ +def greet(name): + print(name) +""" + ) + + self.assertEqual(messages, ["HTK100 do not commit print() calls"]) + + def test_reports_builtin_breakpoint(self) -> None: + messages = run_plugin( + """ +def handler(): + breakpoint() +""" + ) + + self.assertEqual(messages, ["HTK101 do not commit debugger traps"]) + + def test_reports_debugger_module_calls(self) -> None: + messages = run_plugin( + """ +import pdb +import ipdb as debugger + +def handler(): + pdb.set_trace() + debugger.set_trace() +""" + ) + + self.assertEqual( + messages, + [ + "HTK101 do not commit debugger traps", + "HTK101 do not commit debugger traps", + ], + ) + + def test_reports_imported_debugger_calls(self) -> None: + messages = run_plugin( + """ +from pdb import set_trace +from ipdb import set_trace as trace + +def handler(): + set_trace() + trace() +""" + ) + + self.assertEqual( + messages, + [ + "HTK101 do not commit debugger traps", + "HTK101 do not commit debugger traps", + ], + ) + + def test_reports_htk_debug_helpers(self) -> None: + messages = run_plugin( + """ +import htk +from htk import slack_debug as sd + +def handler(): + htk.fdb("state") + htk.slack_debug_json({"ok": True}) + sd("message") +""" + ) + + self.assertEqual( + messages, + [ + "HTK102 do not commit HTK debug helper calls", + "HTK102 do not commit HTK debug helper calls", + "HTK102 do not commit HTK debug helper calls", + ], + ) + + +if __name__ == "__main__": + unittest.main() + From 656f32750b54d6b333e6d028a72b65edf278fe65 Mon Sep 17 00:00:00 2001 From: Jonathan Tsai Date: Sat, 18 Apr 2026 14:41:28 -0700 Subject: [PATCH 2/8] Implement extracted SP and datetime rules --- .github/workflows/ci.yml | 2 +- README.md | 27 +++- decisions/2026-04-18-initial-rule-set.md | 34 ++-- pyproject.toml | 2 +- src/flake8_htk_rules/__init__.py | 28 +++- src/flake8_htk_rules/checks.py | 195 ++++++++++++++--------- tests/test_plugin.py | 163 +++++++++++-------- 7 files changed, 288 insertions(+), 163 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a92e8cb..c993ea3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,7 +2,7 @@ name: CI on: push: - branches: [main] + branches: [master] pull_request: jobs: diff --git a/README.md b/README.md index c241383..4fb652a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # flake8-htk-rules -Hacktoolkit Flake8 rules for catching code that should not ship. +Hacktoolkit Flake8 rules for structured Python code and datetime clarity. ## Installation @@ -19,24 +19,39 @@ python -m unittest discover -s tests -v | Code | Description | | --- | --- | -| `HTK100` | Do not commit `print(...)` calls. | -| `HTK101` | Do not commit debugger traps such as `breakpoint()` or `pdb.set_trace()`. | -| `HTK102` | Do not commit HTK debug helper calls such as `fdb(...)` or `slack_debug(...)`. | +| `SP100` | Functions in configured files should prefer a single return statement. | +| `SP101` | Return values in configured files should be simple variables, attributes, literals, or bare returns. | +| `DT100` | Use `import datetime` instead of `from datetime import datetime`. | +| `DT101` | Use `import datetime` instead of `from datetime import date`. | +| `DT102` | Use `import datetime` instead of `from datetime import timedelta`. | ## Flake8 Configuration +Enable the structured-programming and datetime rules: + ```ini [flake8] -select = HTK +select = SP,DT +structured-programming-files = + accounts/services.py + accounts/view_helpers.py + accounts/views.py ``` Or combine with existing checks: ```ini [flake8] -extend-select = HTK +extend-select = SP,DT +structured-programming-files = + accounts/services.py + accounts/view_helpers.py + accounts/views.py ``` +The `SP` rules are gated by `structured-programming-files` so teams can roll them +out on a targeted set of modules. The `DT` rules are always active when selected. + ## Development The plugin is implemented as a single AST checker registered through the diff --git a/decisions/2026-04-18-initial-rule-set.md b/decisions/2026-04-18-initial-rule-set.md index fff455c..0d8f26b 100644 --- a/decisions/2026-04-18-initial-rule-set.md +++ b/decisions/2026-04-18-initial-rule-set.md @@ -3,23 +3,35 @@ ## Context The repo is intended to be a standalone Flake8 plugin for Hacktoolkit rules. -The original `accounts-django` extraction source was not available in the -local workspace, and GitHub access from this runtime was blocked by DNS. +Linear task `JON-261` points at the `accounts-django` seed commit +`27b9a788a25117fbdb14158704928564d5370337`, whose custom checker introduced +structured programming rules. + +The Linear comments also call out Jonathan's datetime clarity preference from +his blog post on fully qualified datetime usage. ## Options -- Wait for the source repo to become available. -- Create a real plugin package with conservative initial checks and tests. +- Preserve the first placeholder debug-output checks. +- Extract the `accounts-django` structured programming checker and add the + datetime import rules from the Linear task comments. ## Decision -Create a standalone package with a modern `pyproject.toml`, `src/` layout, -Flake8 entry point, CI, and an initial HTK rule set focused on code that should -not ship: `print`, debugger traps, and HTK debug helpers. +Ship the task-backed rule set instead of the placeholder debug checks: -## Action Items +- `SP100` flags multiple returns in a configured function. +- `SP101` flags complex return expressions in configured files. +- `DT100` flags `from datetime import datetime`. +- `DT101` flags `from datetime import date`. +- `DT102` flags `from datetime import timedelta`. -- Add the exact extracted `accounts-django` rules once that source is reachable. -- Push this package to `hacktoolkit/flake8-htk-rules` when GitHub access is - available. +Structured programming checks are gated by `--structured-programming-files`, +matching the original accounts-django rollout pattern. Datetime checks are +always available when `DT` rules are selected. + +## Action Items +- Add more rule families from Jonathan's writing only after the first extracted + SP/DT set is stable. +- Consider a future ESLint package separately if real JavaScript rules emerge. diff --git a/pyproject.toml b/pyproject.toml index b41ee81..993f0ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "hatchling.build" [project] name = "flake8-htk-rules" version = "0.1.0" -description = "Hacktoolkit Flake8 rules for catching code that should not ship." +description = "Hacktoolkit Flake8 rules for structured Python code and datetime clarity." readme = "README.md" requires-python = ">=3.9" license = "MIT" diff --git a/src/flake8_htk_rules/__init__.py b/src/flake8_htk_rules/__init__.py index 2f69473..13c9b4d 100644 --- a/src/flake8_htk_rules/__init__.py +++ b/src/flake8_htk_rules/__init__.py @@ -15,13 +15,38 @@ class Plugin: name = "flake8-htk-rules" version = __version__ + structured_programming_files: tuple[str, ...] = () def __init__(self, tree: ast.AST, filename: str = "") -> None: self.tree = tree self.filename = filename + @classmethod + def add_options(cls, parser) -> None: + parser.add_option( + "--structured-programming-files", + parse_from_config=True, + comma_separated_list=True, + default=[], + help=( + "Comma-separated file globs for structured programming " + "checks." + ), + ) + + @classmethod + def parse_options(cls, options) -> None: + cls.structured_programming_files = tuple( + pattern.strip() + for pattern in getattr(options, "structured_programming_files", []) + if pattern.strip() + ) + def run(self) -> Iterator[tuple[int, int, str, type["Plugin"]]]: - visitor = HtkVisitor() + visitor = HtkVisitor( + filename=self.filename, + structured_programming_files=self.structured_programming_files, + ) visitor.visit(self.tree) for violation in visitor.violations: yield ( @@ -33,4 +58,3 @@ def run(self) -> Iterator[tuple[int, int, str, type["Plugin"]]]: __all__ = ["Plugin", "__version__"] - diff --git a/src/flake8_htk_rules/checks.py b/src/flake8_htk_rules/checks.py index f10aeff..d662ff8 100644 --- a/src/flake8_htk_rules/checks.py +++ b/src/flake8_htk_rules/checks.py @@ -3,22 +3,36 @@ from __future__ import annotations import ast +import os from dataclasses import dataclass - - -HTK100 = "HTK100 do not commit print() calls" -HTK101 = "HTK101 do not commit debugger traps" -HTK102 = "HTK102 do not commit HTK debug helper calls" - -DEBUG_MODULES = {"pdb", "ipdb", "pudb", "wdb"} -DEBUG_METHODS = {"set_trace", "post_mortem", "pm", "runcall", "runctx"} -HTK_DEBUG_HELPERS = { - "fdb", - "fdb_json", - "fdebug", - "fdebug_json", - "slack_debug", - "slack_debug_json", +from fnmatch import fnmatch + + +SP100 = ( + "SP100 function '{name}' has {count} return statements; " + "prefer a single return per function" +) +SP101 = ( + "SP101 return value should be a simple variable, attribute, or literal, " + "not a complex expression" +) +DT100 = ( + "DT100 use 'import datetime' instead of " + "'from datetime import datetime'" +) +DT101 = ( + "DT101 use 'import datetime' instead of " + "'from datetime import date'" +) +DT102 = ( + "DT102 use 'import datetime' instead of " + "'from datetime import timedelta'" +) + +DATETIME_IMPORT_MESSAGES = { + "datetime": DT100, + "date": DT101, + "timedelta": DT102, } @@ -30,79 +44,110 @@ class Violation: class HtkVisitor(ast.NodeVisitor): - """Collect HTK rule violations from a Python AST.""" - - def __init__(self) -> None: + """Collect Hacktoolkit rule violations from a Python AST.""" + + def __init__( + self, + *, + filename: str, + structured_programming_files: tuple[str, ...] = (), + ) -> None: + self.filename = filename + self.structured_programming_files = structured_programming_files self.violations: list[Violation] = [] - self._debug_aliases: set[str] = set() - self._helper_aliases: set[str] = set() - - def visit_Import(self, node: ast.Import) -> None: - for alias in node.names: - module_name = alias.name.split(".", 1)[0] - if module_name in DEBUG_MODULES: - self._debug_aliases.add(alias.asname or module_name) - self.generic_visit(node) def visit_ImportFrom(self, node: ast.ImportFrom) -> None: - module_name = (node.module or "").split(".", 1)[0] - if module_name in DEBUG_MODULES: - for alias in node.names: - if alias.name in DEBUG_METHODS or alias.name == "*": - self._debug_aliases.add(alias.asname or alias.name) - if module_name == "htk": + if node.module == "datetime" and node.level == 0: for alias in node.names: - if alias.name in HTK_DEBUG_HELPERS or alias.name == "*": - self._helper_aliases.add(alias.asname or alias.name) + message = DATETIME_IMPORT_MESSAGES.get(alias.name) + if message is not None: + self._add(alias, node, message) self.generic_visit(node) - def visit_Call(self, node: ast.Call) -> None: - name = _call_name(node.func) - if name == "print": - self._add(node, HTK100) - elif _is_debugger_call(name, self._debug_aliases): - self._add(node, HTK101) - elif _is_htk_debug_helper(name, self._helper_aliases): - self._add(node, HTK102) + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + self._check_function(node) self.generic_visit(node) - def _add(self, node: ast.AST, message: str) -> None: + def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: + self._check_function(node) + self.generic_visit(node) + + def _check_function( + self, + node: ast.FunctionDef | ast.AsyncFunctionDef, + ) -> None: + if not self._should_run_structured_programming_checks(): + return + + returns = _collect_returns(node) + if len(returns) > 1: + self._add( + node, + node, + SP100.format(name=node.name, count=len(returns)), + ) + + for return_node in returns: + if not _is_simple_return_value(return_node.value): + self._add(return_node, return_node, SP101) + + def _should_run_structured_programming_checks(self) -> bool: + if not self.structured_programming_files: + return False + + relative_path = os.path.relpath(self.filename, os.getcwd()).replace( + os.sep, + "/", + ) + return any( + fnmatch(relative_path, pattern) + for pattern in self.structured_programming_files + ) + + def _add(self, node: ast.AST, fallback: ast.AST, message: str) -> None: self.violations.append( Violation( - line=getattr(node, "lineno", 1), - column=getattr(node, "col_offset", 0), + line=getattr(node, "lineno", getattr(fallback, "lineno", 1)), + column=getattr( + node, + "col_offset", + getattr(fallback, "col_offset", 0), + ), message=message, ) ) -def _call_name(node: ast.AST) -> str: - if isinstance(node, ast.Name): - return node.id - if isinstance(node, ast.Attribute): - parent_name = _call_name(node.value) - if parent_name: - return f"{parent_name}.{node.attr}" - return node.attr - return "" - - -def _is_debugger_call(name: str, aliases: set[str]) -> bool: - if name == "breakpoint": - return True - if name in aliases: - return True - if "." not in name: - return False - module_name, method_name = name.rsplit(".", 1) - return module_name in DEBUG_MODULES | aliases and method_name in DEBUG_METHODS - - -def _is_htk_debug_helper(name: str, aliases: set[str]) -> bool: - if name in HTK_DEBUG_HELPERS | aliases: - return True - if "." not in name: - return False - _, method_name = name.rsplit(".", 1) - return method_name in HTK_DEBUG_HELPERS +def _collect_returns( + function_node: ast.FunctionDef | ast.AsyncFunctionDef, +) -> list[ast.Return]: + collector = _ReturnCollector(function_node) + collector.visit(function_node) + return collector.returns + + +def _is_simple_return_value(value: ast.expr | None) -> bool: + return value is None or isinstance( + value, + (ast.Name, ast.Attribute, ast.Constant), + ) + + +class _ReturnCollector(ast.NodeVisitor): + def __init__(self, root: ast.FunctionDef | ast.AsyncFunctionDef) -> None: + self.root = root + self.returns: list[ast.Return] = [] + + def visit_Return(self, node: ast.Return) -> None: + self.returns.append(node) + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + if node is self.root: + self.generic_visit(node) + + def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: + if node is self.root: + self.generic_visit(node) + def visit_Lambda(self, node: ast.Lambda) -> None: + return diff --git a/tests/test_plugin.py b/tests/test_plugin.py index cabd47b..3010432 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -2,109 +2,138 @@ import ast import unittest +from types import SimpleNamespace from flake8_htk_rules import Plugin - -def run_plugin(source: str) -> list[str]: +SP100_SAMPLE = ( + "SP100 function 'sample' has 2 return statements; " + "prefer a single return per function" +) +SP101 = ( + "SP101 return value should be a simple variable, attribute, or literal, " + "not a complex expression" +) +DT100 = ( + "DT100 use 'import datetime' instead of " + "'from datetime import datetime'" +) +DT101 = ( + "DT101 use 'import datetime' instead of " + "'from datetime import date'" +) +DT102 = ( + "DT102 use 'import datetime' instead of " + "'from datetime import timedelta'" +) + + +def run_plugin( + source: str, + *, + filename: str = "accounts/services.py", + structured_programming_files: list[str] | None = None, +) -> list[str]: + Plugin.parse_options( + SimpleNamespace( + structured_programming_files=structured_programming_files or [], + ) + ) tree = ast.parse(source) - return [message for _, _, message, _ in Plugin(tree).run()] + return [message for _, _, message, _ in Plugin(tree, filename).run()] class PluginTest(unittest.TestCase): - def test_allows_normal_code(self) -> None: + def test_flags_multiple_returns_for_configured_file(self) -> None: messages = run_plugin( """ -def greet(name): - return f"hello {name}" -""" +def sample(value): + if value: + return value + return None +""", + structured_programming_files=["accounts/services.py"], ) - self.assertEqual(messages, []) + self.assertEqual(messages, [SP100_SAMPLE]) - def test_reports_print_calls(self) -> None: + def test_flags_complex_return_expression_for_configured_file(self) -> None: messages = run_plugin( """ -def greet(name): - print(name) -""" +def sample(value): + return value + 1 +""", + structured_programming_files=["accounts/services.py"], ) - self.assertEqual(messages, ["HTK100 do not commit print() calls"]) + self.assertEqual(messages, [SP101]) - def test_reports_builtin_breakpoint(self) -> None: + def test_allows_single_simple_return_for_configured_file(self) -> None: messages = run_plugin( """ -def handler(): - breakpoint() -""" +def sample(value): + result = value + 1 + return result +""", + structured_programming_files=["accounts/services.py"], ) - self.assertEqual(messages, ["HTK101 do not commit debugger traps"]) + self.assertEqual(messages, []) - def test_reports_debugger_module_calls(self) -> None: + def test_skips_structured_programming_for_unconfigured_file(self) -> None: messages = run_plugin( """ -import pdb -import ipdb as debugger - -def handler(): - pdb.set_trace() - debugger.set_trace() -""" +def sample(value): + if value: + return value + return value + 1 +""", + filename="accounts/forms.py", + structured_programming_files=["accounts/services.py"], ) - self.assertEqual( - messages, - [ - "HTK101 do not commit debugger traps", - "HTK101 do not commit debugger traps", - ], - ) + self.assertEqual(messages, []) - def test_reports_imported_debugger_calls(self) -> None: + def test_ignores_returns_inside_nested_functions(self) -> None: messages = run_plugin( """ -from pdb import set_trace -from ipdb import set_trace as trace - -def handler(): - set_trace() - trace() -""" +def outer(value): + def inner(): + return value + return value +""", + structured_programming_files=["accounts/services.py"], ) - self.assertEqual( - messages, - [ - "HTK101 do not commit debugger traps", - "HTK101 do not commit debugger traps", - ], - ) + self.assertEqual(messages, []) + + def test_flags_datetime_datetime_import(self) -> None: + messages = run_plugin("from datetime import datetime\n") - def test_reports_htk_debug_helpers(self) -> None: + self.assertEqual(messages, [DT100]) + + def test_flags_datetime_date_import(self) -> None: + messages = run_plugin("from datetime import date\n") + + self.assertEqual(messages, [DT101]) + + def test_flags_datetime_timedelta_import(self) -> None: + messages = run_plugin("from datetime import timedelta\n") + + self.assertEqual(messages, [DT102]) + + def test_allows_fully_qualified_datetime_import(self) -> None: + messages = run_plugin("import datetime\n") + + self.assertEqual(messages, []) + + def test_flags_multiple_datetime_imports(self) -> None: messages = run_plugin( - """ -import htk -from htk import slack_debug as sd - -def handler(): - htk.fdb("state") - htk.slack_debug_json({"ok": True}) - sd("message") -""" + "from datetime import date, datetime, timedelta\n" ) - self.assertEqual( - messages, - [ - "HTK102 do not commit HTK debug helper calls", - "HTK102 do not commit HTK debug helper calls", - "HTK102 do not commit HTK debug helper calls", - ], - ) + self.assertEqual(messages, [DT101, DT100, DT102]) if __name__ == "__main__": unittest.main() - From f8b657b5c62600e67f47c87ada17aa05cc43095a Mon Sep 17 00:00:00 2001 From: Jonathan Tsai Date: Sat, 18 Apr 2026 14:58:19 -0700 Subject: [PATCH 3/8] Add debugger import and call checks --- README.md | 9 ++- decisions/2026-04-18-initial-rule-set.md | 4 +- src/flake8_htk_rules/checks.py | 70 ++++++++++++++++++++++++ tests/test_plugin.py | 52 ++++++++++++++++++ 4 files changed, 131 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 4fb652a..b5cbae9 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,8 @@ python -m unittest discover -s tests -v | `DT100` | Use `import datetime` instead of `from datetime import datetime`. | | `DT101` | Use `import datetime` instead of `from datetime import date`. | | `DT102` | Use `import datetime` instead of `from datetime import timedelta`. | +| `DB100` | Do not commit debugger imports such as `import pdb`. | +| `DB101` | Do not commit debugger calls such as `breakpoint()` or `pdb.set_trace()`. | ## Flake8 Configuration @@ -31,7 +33,7 @@ Enable the structured-programming and datetime rules: ```ini [flake8] -select = SP,DT +select = SP,DT,DB structured-programming-files = accounts/services.py accounts/view_helpers.py @@ -42,7 +44,7 @@ Or combine with existing checks: ```ini [flake8] -extend-select = SP,DT +extend-select = SP,DT,DB,DB structured-programming-files = accounts/services.py accounts/view_helpers.py @@ -50,7 +52,8 @@ structured-programming-files = ``` The `SP` rules are gated by `structured-programming-files` so teams can roll them -out on a targeted set of modules. The `DT` rules are always active when selected. +out on a targeted set of modules. The `DT` and `DB` rules are always active when +selected. ## Development diff --git a/decisions/2026-04-18-initial-rule-set.md b/decisions/2026-04-18-initial-rule-set.md index 0d8f26b..c1d253a 100644 --- a/decisions/2026-04-18-initial-rule-set.md +++ b/decisions/2026-04-18-initial-rule-set.md @@ -25,10 +25,12 @@ Ship the task-backed rule set instead of the placeholder debug checks: - `DT100` flags `from datetime import datetime`. - `DT101` flags `from datetime import date`. - `DT102` flags `from datetime import timedelta`. +- `DB100` flags debugger imports. +- `DB101` flags debugger calls. Structured programming checks are gated by `--structured-programming-files`, matching the original accounts-django rollout pattern. Datetime checks are -always available when `DT` rules are selected. +always available when their `DT` or `DB` rules are selected. ## Action Items diff --git a/src/flake8_htk_rules/checks.py b/src/flake8_htk_rules/checks.py index d662ff8..d22530f 100644 --- a/src/flake8_htk_rules/checks.py +++ b/src/flake8_htk_rules/checks.py @@ -28,12 +28,22 @@ "DT102 use 'import datetime' instead of " "'from datetime import timedelta'" ) +DB100 = "DB100 do not commit debugger imports" +DB101 = "DB101 do not commit debugger calls" DATETIME_IMPORT_MESSAGES = { "datetime": DT100, "date": DT101, "timedelta": DT102, } +DEBUGGER_MODULES = {"pdb", "ipdb", "pudb", "wdb"} +DEBUGGER_METHODS = { + "set_trace", + "post_mortem", + "pm", + "runcall", + "runctx", +} @dataclass(frozen=True, order=True) @@ -55,6 +65,16 @@ def __init__( self.filename = filename self.structured_programming_files = structured_programming_files self.violations: list[Violation] = [] + self._debugger_module_aliases: set[str] = set() + self._debugger_call_aliases: set[str] = set() + + def visit_Import(self, node: ast.Import) -> None: + for alias in node.names: + root_module = alias.name.split(".", 1)[0] + if root_module in DEBUGGER_MODULES: + self._debugger_module_aliases.add(alias.asname or root_module) + self._add(alias, node, DB100) + self.generic_visit(node) def visit_ImportFrom(self, node: ast.ImportFrom) -> None: if node.module == "datetime" and node.level == 0: @@ -62,6 +82,26 @@ def visit_ImportFrom(self, node: ast.ImportFrom) -> None: message = DATETIME_IMPORT_MESSAGES.get(alias.name) if message is not None: self._add(alias, node, message) + + root_module = (node.module or "").split(".", 1)[0] + if root_module in DEBUGGER_MODULES: + for alias in node.names: + self._add(alias, node, DB100) + if alias.name == "*": + self._debugger_call_aliases.update(DEBUGGER_METHODS) + elif alias.name in DEBUGGER_METHODS: + self._debugger_call_aliases.add(alias.asname or alias.name) + + self.generic_visit(node) + + def visit_Call(self, node: ast.Call) -> None: + name = _call_name(node.func) + if _is_debugger_call( + name, + module_aliases=self._debugger_module_aliases, + call_aliases=self._debugger_call_aliases, + ): + self._add(node, node, DB101) self.generic_visit(node) def visit_FunctionDef(self, node: ast.FunctionDef) -> None: @@ -133,6 +173,36 @@ def _is_simple_return_value(value: ast.expr | None) -> bool: ) +def _call_name(node: ast.AST) -> str: + if isinstance(node, ast.Name): + return node.id + if isinstance(node, ast.Attribute): + parent_name = _call_name(node.value) + if parent_name: + return f"{parent_name}.{node.attr}" + return node.attr + return "" + + +def _is_debugger_call( + name: str, + *, + module_aliases: set[str], + call_aliases: set[str], +) -> bool: + if name == "breakpoint" or name in call_aliases: + return True + if "." not in name: + return False + + module_name, method_name = name.rsplit(".", 1) + root_module = module_name.split(".", 1)[0] + return ( + method_name in DEBUGGER_METHODS + and root_module in DEBUGGER_MODULES | module_aliases + ) + + class _ReturnCollector(ast.NodeVisitor): def __init__(self, root: ast.FunctionDef | ast.AsyncFunctionDef) -> None: self.root = root diff --git a/tests/test_plugin.py b/tests/test_plugin.py index 3010432..01d54eb 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -26,6 +26,8 @@ "DT102 use 'import datetime' instead of " "'from datetime import timedelta'" ) +DB100 = "DB100 do not commit debugger imports" +DB101 = "DB101 do not commit debugger calls" def run_plugin( @@ -134,6 +136,56 @@ def test_flags_multiple_datetime_imports(self) -> None: self.assertEqual(messages, [DT101, DT100, DT102]) + def test_flags_builtin_breakpoint(self) -> None: + messages = run_plugin( + """ +def handler(): + breakpoint() +""" + ) + + self.assertEqual(messages, [DB101]) + + def test_flags_debugger_module_import_and_call(self) -> None: + messages = run_plugin( + """ +import pdb +import ipdb as debugger + +def handler(): + pdb.set_trace() + debugger.set_trace() +""" + ) + + self.assertEqual(messages, [DB100, DB100, DB101, DB101]) + + def test_flags_imported_debugger_call_alias(self) -> None: + messages = run_plugin( + """ +from pdb import set_trace +from ipdb import set_trace as trace + +def handler(): + set_trace() + trace() +""" + ) + + self.assertEqual(messages, [DB100, DB100, DB101, DB101]) + + def test_flags_debugger_wildcard_imported_call(self) -> None: + messages = run_plugin( + """ +from pdb import * + +def handler(): + set_trace() +""" + ) + + self.assertEqual(messages, [DB100, DB101]) + if __name__ == "__main__": unittest.main() From d2cc0e9f9d554fc9751cbeec15d6daff0d37a335 Mon Sep 17 00:00:00 2001 From: Jonathan Tsai Date: Sat, 18 Apr 2026 15:00:41 -0700 Subject: [PATCH 4/8] Warn on vague get_ function names --- README.md | 11 ++++---- decisions/2026-04-18-initial-rule-set.md | 3 +- src/flake8_htk_rules/checks.py | 8 ++++++ tests/test_plugin.py | 35 ++++++++++++++++++++++++ 4 files changed, 51 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index b5cbae9..2d33835 100644 --- a/README.md +++ b/README.md @@ -26,14 +26,15 @@ python -m unittest discover -s tests -v | `DT102` | Use `import datetime` instead of `from datetime import timedelta`. | | `DB100` | Do not commit debugger imports such as `import pdb`. | | `DB101` | Do not commit debugger calls such as `breakpoint()` or `pdb.set_trace()`. | +| `NM100` | Avoid the vague `get_` function prefix; choose a more precise verb. | ## Flake8 Configuration -Enable the structured-programming and datetime rules: +Enable the rules: ```ini [flake8] -select = SP,DT,DB +select = SP,DT,DB,NM structured-programming-files = accounts/services.py accounts/view_helpers.py @@ -44,7 +45,7 @@ Or combine with existing checks: ```ini [flake8] -extend-select = SP,DT,DB,DB +extend-select = SP,DT,DB,NM structured-programming-files = accounts/services.py accounts/view_helpers.py @@ -52,8 +53,8 @@ structured-programming-files = ``` The `SP` rules are gated by `structured-programming-files` so teams can roll them -out on a targeted set of modules. The `DT` and `DB` rules are always active when -selected. +out on a targeted set of modules. The `DT`, `DB`, and `NM` rules are always +active when selected. ## Development diff --git a/decisions/2026-04-18-initial-rule-set.md b/decisions/2026-04-18-initial-rule-set.md index c1d253a..35894b4 100644 --- a/decisions/2026-04-18-initial-rule-set.md +++ b/decisions/2026-04-18-initial-rule-set.md @@ -27,10 +27,11 @@ Ship the task-backed rule set instead of the placeholder debug checks: - `DT102` flags `from datetime import timedelta`. - `DB100` flags debugger imports. - `DB101` flags debugger calls. +- `NM100` flags functions and methods named with the vague `get_` prefix. Structured programming checks are gated by `--structured-programming-files`, matching the original accounts-django rollout pattern. Datetime checks are -always available when their `DT` or `DB` rules are selected. +always available when their `DT`, `DB`, or `NM` rules are selected. ## Action Items diff --git a/src/flake8_htk_rules/checks.py b/src/flake8_htk_rules/checks.py index d22530f..2196aa5 100644 --- a/src/flake8_htk_rules/checks.py +++ b/src/flake8_htk_rules/checks.py @@ -30,6 +30,11 @@ ) DB100 = "DB100 do not commit debugger imports" DB101 = "DB101 do not commit debugger calls" +NM100 = ( + "NM100 avoid get_ function prefix; choose a more precise verb " + "such as build_, calculate_, extract_, fetch_, look_up_, " + "retrieve_, format_, or transform_" +) DATETIME_IMPORT_MESSAGES = { "datetime": DT100, @@ -116,6 +121,9 @@ def _check_function( self, node: ast.FunctionDef | ast.AsyncFunctionDef, ) -> None: + if node.name.startswith("get_"): + self._add(node, node, NM100) + if not self._should_run_structured_programming_checks(): return diff --git a/tests/test_plugin.py b/tests/test_plugin.py index 01d54eb..d4d8197 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -28,6 +28,11 @@ ) DB100 = "DB100 do not commit debugger imports" DB101 = "DB101 do not commit debugger calls" +NM100 = ( + "NM100 avoid get_ function prefix; choose a more precise verb " + "such as build_, calculate_, extract_, fetch_, look_up_, " + "retrieve_, format_, or transform_" +) def run_plugin( @@ -186,6 +191,36 @@ def handler(): self.assertEqual(messages, [DB100, DB101]) + def test_flags_get_prefixed_function_name(self) -> None: + messages = run_plugin( + """ +def get_total(items): + return sum(items) +""" + ) + + self.assertEqual(messages, [NM100]) + + def test_flags_get_prefixed_async_function_name(self) -> None: + messages = run_plugin( + """ +async def get_story_details(story_id): + return story_id +""" + ) + + self.assertEqual(messages, [NM100]) + + def test_allows_non_get_prefixed_function_name(self) -> None: + messages = run_plugin( + """ +def fetch_story_details(story_id): + return story_id +""" + ) + + self.assertEqual(messages, []) + if __name__ == "__main__": unittest.main() From 70615e5bc6722e910e7f35e3d9d56aa51f0c3e02 Mon Sep 17 00:00:00 2001 From: Jonathan Tsai Date: Sat, 18 Apr 2026 15:16:44 -0700 Subject: [PATCH 5/8] Refresh rule set docs --- README.md | 7 ++++--- decisions/2026-04-18-initial-rule-set.md | 22 +++++++++++++--------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 2d33835..6a38620 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # flake8-htk-rules -Hacktoolkit Flake8 rules for structured Python code and datetime clarity. +Hacktoolkit Flake8 rules for structured Python code, datetime clarity, +debugger prevention, and naming precision. ## Installation @@ -24,9 +25,9 @@ python -m unittest discover -s tests -v | `DT100` | Use `import datetime` instead of `from datetime import datetime`. | | `DT101` | Use `import datetime` instead of `from datetime import date`. | | `DT102` | Use `import datetime` instead of `from datetime import timedelta`. | -| `DB100` | Do not commit debugger imports such as `import pdb`. | +| `DB100` | Do not commit debugger imports such as `import pdb` or `from pdb import set_trace`. | | `DB101` | Do not commit debugger calls such as `breakpoint()` or `pdb.set_trace()`. | -| `NM100` | Avoid the vague `get_` function prefix; choose a more precise verb. | +| `NM100` | Avoid the vague `get_` function or method prefix; choose a more precise verb. | ## Flake8 Configuration diff --git a/decisions/2026-04-18-initial-rule-set.md b/decisions/2026-04-18-initial-rule-set.md index 35894b4..96776d0 100644 --- a/decisions/2026-04-18-initial-rule-set.md +++ b/decisions/2026-04-18-initial-rule-set.md @@ -8,17 +8,20 @@ Linear task `JON-261` points at the `accounts-django` seed commit structured programming rules. The Linear comments also call out Jonathan's datetime clarity preference from -his blog post on fully qualified datetime usage. +his blog post on fully qualified datetime usage. Follow-up Slack feedback added +explicit debugger prevention and the `get_` naming rule from Jonathan's writing +on vague function prefixes. ## Options -- Preserve the first placeholder debug-output checks. -- Extract the `accounts-django` structured programming checker and add the - datetime import rules from the Linear task comments. +- Keep the first placeholder debug-output-only rule set. +- Extract the `accounts-django` structured programming checker, then add the + task/comment-backed datetime, debugger, and naming rules. ## Decision -Ship the task-backed rule set instead of the placeholder debug checks: +Ship the task-backed rule set and keep debugger prevention as a first-class rule +family: - `SP100` flags multiple returns in a configured function. - `SP101` flags complex return expressions in configured files. @@ -30,11 +33,12 @@ Ship the task-backed rule set instead of the placeholder debug checks: - `NM100` flags functions and methods named with the vague `get_` prefix. Structured programming checks are gated by `--structured-programming-files`, -matching the original accounts-django rollout pattern. Datetime checks are -always available when their `DT`, `DB`, or `NM` rules are selected. +matching the original accounts-django rollout pattern. Datetime, debugger, and +naming checks are always available when their `DT`, `DB`, or `NM` rules are +selected. ## Action Items -- Add more rule families from Jonathan's writing only after the first extracted - SP/DT set is stable. +- Add more rule families from Jonathan's writing only after the current SP, DT, + DB, and NM rules are stable. - Consider a future ESLint package separately if real JavaScript rules emerge. From 38b31f48b96ac0d63292f37673a2f47b7235a834 Mon Sep 17 00:00:00 2001 From: Jonathan Tsai Date: Sat, 18 Apr 2026 20:03:31 -0700 Subject: [PATCH 6/8] Remove public decision log --- decisions/2026-04-18-initial-rule-set.md | 44 ------------------------ 1 file changed, 44 deletions(-) delete mode 100644 decisions/2026-04-18-initial-rule-set.md diff --git a/decisions/2026-04-18-initial-rule-set.md b/decisions/2026-04-18-initial-rule-set.md deleted file mode 100644 index 96776d0..0000000 --- a/decisions/2026-04-18-initial-rule-set.md +++ /dev/null @@ -1,44 +0,0 @@ -# Initial Rule Set - -## Context - -The repo is intended to be a standalone Flake8 plugin for Hacktoolkit rules. -Linear task `JON-261` points at the `accounts-django` seed commit -`27b9a788a25117fbdb14158704928564d5370337`, whose custom checker introduced -structured programming rules. - -The Linear comments also call out Jonathan's datetime clarity preference from -his blog post on fully qualified datetime usage. Follow-up Slack feedback added -explicit debugger prevention and the `get_` naming rule from Jonathan's writing -on vague function prefixes. - -## Options - -- Keep the first placeholder debug-output-only rule set. -- Extract the `accounts-django` structured programming checker, then add the - task/comment-backed datetime, debugger, and naming rules. - -## Decision - -Ship the task-backed rule set and keep debugger prevention as a first-class rule -family: - -- `SP100` flags multiple returns in a configured function. -- `SP101` flags complex return expressions in configured files. -- `DT100` flags `from datetime import datetime`. -- `DT101` flags `from datetime import date`. -- `DT102` flags `from datetime import timedelta`. -- `DB100` flags debugger imports. -- `DB101` flags debugger calls. -- `NM100` flags functions and methods named with the vague `get_` prefix. - -Structured programming checks are gated by `--structured-programming-files`, -matching the original accounts-django rollout pattern. Datetime, debugger, and -naming checks are always available when their `DT`, `DB`, or `NM` rules are -selected. - -## Action Items - -- Add more rule families from Jonathan's writing only after the current SP, DT, - DB, and NM rules are stable. -- Consider a future ESLint package separately if real JavaScript rules emerge. From da196eeded97f2639868e18f1ea04f207bc4f9d9 Mon Sep 17 00:00:00 2001 From: Jonathan Tsai Date: Sat, 18 Apr 2026 20:34:19 -0700 Subject: [PATCH 7/8] Split checks by rule family --- README.md | 6 +- src/flake8_htk_rules/checks.py | 231 ---------------------- src/flake8_htk_rules/checks/__init__.py | 90 +++++++++ src/flake8_htk_rules/checks/datetime.py | 37 ++++ src/flake8_htk_rules/checks/debugger.py | 91 +++++++++ src/flake8_htk_rules/checks/naming.py | 20 ++ src/flake8_htk_rules/checks/structured.py | 84 ++++++++ src/flake8_htk_rules/checks/types.py | 12 ++ 8 files changed, 337 insertions(+), 234 deletions(-) delete mode 100644 src/flake8_htk_rules/checks.py create mode 100644 src/flake8_htk_rules/checks/__init__.py create mode 100644 src/flake8_htk_rules/checks/datetime.py create mode 100644 src/flake8_htk_rules/checks/debugger.py create mode 100644 src/flake8_htk_rules/checks/naming.py create mode 100644 src/flake8_htk_rules/checks/structured.py create mode 100644 src/flake8_htk_rules/checks/types.py diff --git a/README.md b/README.md index 6a38620..42625a1 100644 --- a/README.md +++ b/README.md @@ -59,9 +59,9 @@ active when selected. ## Development -The plugin is implemented as a single AST checker registered through the -`flake8.extension` entry point. Add new checks in -`src/flake8_htk_rules/checks.py` and cover them in `tests/`. +The plugin uses a single Flake8 entry point and delegates rule logic to +family modules under `src/flake8_htk_rules/checks/`. Add new rule families +there and cover them in `tests/`. Run the test suite without installing the package: diff --git a/src/flake8_htk_rules/checks.py b/src/flake8_htk_rules/checks.py deleted file mode 100644 index 2196aa5..0000000 --- a/src/flake8_htk_rules/checks.py +++ /dev/null @@ -1,231 +0,0 @@ -"""AST checks for Hacktoolkit Flake8 rules.""" - -from __future__ import annotations - -import ast -import os -from dataclasses import dataclass -from fnmatch import fnmatch - - -SP100 = ( - "SP100 function '{name}' has {count} return statements; " - "prefer a single return per function" -) -SP101 = ( - "SP101 return value should be a simple variable, attribute, or literal, " - "not a complex expression" -) -DT100 = ( - "DT100 use 'import datetime' instead of " - "'from datetime import datetime'" -) -DT101 = ( - "DT101 use 'import datetime' instead of " - "'from datetime import date'" -) -DT102 = ( - "DT102 use 'import datetime' instead of " - "'from datetime import timedelta'" -) -DB100 = "DB100 do not commit debugger imports" -DB101 = "DB101 do not commit debugger calls" -NM100 = ( - "NM100 avoid get_ function prefix; choose a more precise verb " - "such as build_, calculate_, extract_, fetch_, look_up_, " - "retrieve_, format_, or transform_" -) - -DATETIME_IMPORT_MESSAGES = { - "datetime": DT100, - "date": DT101, - "timedelta": DT102, -} -DEBUGGER_MODULES = {"pdb", "ipdb", "pudb", "wdb"} -DEBUGGER_METHODS = { - "set_trace", - "post_mortem", - "pm", - "runcall", - "runctx", -} - - -@dataclass(frozen=True, order=True) -class Violation: - line: int - column: int - message: str - - -class HtkVisitor(ast.NodeVisitor): - """Collect Hacktoolkit rule violations from a Python AST.""" - - def __init__( - self, - *, - filename: str, - structured_programming_files: tuple[str, ...] = (), - ) -> None: - self.filename = filename - self.structured_programming_files = structured_programming_files - self.violations: list[Violation] = [] - self._debugger_module_aliases: set[str] = set() - self._debugger_call_aliases: set[str] = set() - - def visit_Import(self, node: ast.Import) -> None: - for alias in node.names: - root_module = alias.name.split(".", 1)[0] - if root_module in DEBUGGER_MODULES: - self._debugger_module_aliases.add(alias.asname or root_module) - self._add(alias, node, DB100) - self.generic_visit(node) - - def visit_ImportFrom(self, node: ast.ImportFrom) -> None: - if node.module == "datetime" and node.level == 0: - for alias in node.names: - message = DATETIME_IMPORT_MESSAGES.get(alias.name) - if message is not None: - self._add(alias, node, message) - - root_module = (node.module or "").split(".", 1)[0] - if root_module in DEBUGGER_MODULES: - for alias in node.names: - self._add(alias, node, DB100) - if alias.name == "*": - self._debugger_call_aliases.update(DEBUGGER_METHODS) - elif alias.name in DEBUGGER_METHODS: - self._debugger_call_aliases.add(alias.asname or alias.name) - - self.generic_visit(node) - - def visit_Call(self, node: ast.Call) -> None: - name = _call_name(node.func) - if _is_debugger_call( - name, - module_aliases=self._debugger_module_aliases, - call_aliases=self._debugger_call_aliases, - ): - self._add(node, node, DB101) - self.generic_visit(node) - - def visit_FunctionDef(self, node: ast.FunctionDef) -> None: - self._check_function(node) - self.generic_visit(node) - - def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: - self._check_function(node) - self.generic_visit(node) - - def _check_function( - self, - node: ast.FunctionDef | ast.AsyncFunctionDef, - ) -> None: - if node.name.startswith("get_"): - self._add(node, node, NM100) - - if not self._should_run_structured_programming_checks(): - return - - returns = _collect_returns(node) - if len(returns) > 1: - self._add( - node, - node, - SP100.format(name=node.name, count=len(returns)), - ) - - for return_node in returns: - if not _is_simple_return_value(return_node.value): - self._add(return_node, return_node, SP101) - - def _should_run_structured_programming_checks(self) -> bool: - if not self.structured_programming_files: - return False - - relative_path = os.path.relpath(self.filename, os.getcwd()).replace( - os.sep, - "/", - ) - return any( - fnmatch(relative_path, pattern) - for pattern in self.structured_programming_files - ) - - def _add(self, node: ast.AST, fallback: ast.AST, message: str) -> None: - self.violations.append( - Violation( - line=getattr(node, "lineno", getattr(fallback, "lineno", 1)), - column=getattr( - node, - "col_offset", - getattr(fallback, "col_offset", 0), - ), - message=message, - ) - ) - - -def _collect_returns( - function_node: ast.FunctionDef | ast.AsyncFunctionDef, -) -> list[ast.Return]: - collector = _ReturnCollector(function_node) - collector.visit(function_node) - return collector.returns - - -def _is_simple_return_value(value: ast.expr | None) -> bool: - return value is None or isinstance( - value, - (ast.Name, ast.Attribute, ast.Constant), - ) - - -def _call_name(node: ast.AST) -> str: - if isinstance(node, ast.Name): - return node.id - if isinstance(node, ast.Attribute): - parent_name = _call_name(node.value) - if parent_name: - return f"{parent_name}.{node.attr}" - return node.attr - return "" - - -def _is_debugger_call( - name: str, - *, - module_aliases: set[str], - call_aliases: set[str], -) -> bool: - if name == "breakpoint" or name in call_aliases: - return True - if "." not in name: - return False - - module_name, method_name = name.rsplit(".", 1) - root_module = module_name.split(".", 1)[0] - return ( - method_name in DEBUGGER_METHODS - and root_module in DEBUGGER_MODULES | module_aliases - ) - - -class _ReturnCollector(ast.NodeVisitor): - def __init__(self, root: ast.FunctionDef | ast.AsyncFunctionDef) -> None: - self.root = root - self.returns: list[ast.Return] = [] - - def visit_Return(self, node: ast.Return) -> None: - self.returns.append(node) - - def visit_FunctionDef(self, node: ast.FunctionDef) -> None: - if node is self.root: - self.generic_visit(node) - - def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: - if node is self.root: - self.generic_visit(node) - - def visit_Lambda(self, node: ast.Lambda) -> None: - return diff --git a/src/flake8_htk_rules/checks/__init__.py b/src/flake8_htk_rules/checks/__init__.py new file mode 100644 index 0000000..6951d25 --- /dev/null +++ b/src/flake8_htk_rules/checks/__init__.py @@ -0,0 +1,90 @@ +"""Check orchestration for Hacktoolkit Flake8 rules.""" + +from __future__ import annotations + +import ast + +from . import datetime as datetime_checks +from . import debugger, naming, structured +from .types import Violation + + +class HtkVisitor(ast.NodeVisitor): + """Collect Hacktoolkit rule violations from a Python AST.""" + + def __init__( + self, + *, + filename: str, + structured_programming_files: tuple[str, ...] = (), + ) -> None: + self.filename = filename + self.structured_programming_files = structured_programming_files + self.violations: list[Violation] = [] + self._debugger_state = debugger.DebuggerState() + + def visit_Import(self, node: ast.Import) -> None: + for violation_node, message in debugger.check_import( + node, + self._debugger_state, + ): + self._add(violation_node, node, message) + self.generic_visit(node) + + def visit_ImportFrom(self, node: ast.ImportFrom) -> None: + for violation_node, message in datetime_checks.check_import_from(node): + self._add(violation_node, node, message) + for violation_node, message in debugger.check_import_from( + node, + self._debugger_state, + ): + self._add(violation_node, node, message) + self.generic_visit(node) + + def visit_Call(self, node: ast.Call) -> None: + for violation_node, message in debugger.check_call( + node, + self._debugger_state, + ): + self._add(violation_node, node, message) + self.generic_visit(node) + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + self._check_function(node) + self.generic_visit(node) + + def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: + self._check_function(node) + self.generic_visit(node) + + def _check_function( + self, + node: ast.FunctionDef | ast.AsyncFunctionDef, + ) -> None: + for violation_node, message in naming.check_function(node): + self._add(violation_node, node, message) + + if not structured.should_check_file( + self.filename, + self.structured_programming_files, + ): + return + + for violation_node, message in structured.check_function(node): + self._add(violation_node, node, message) + + def _add(self, node: ast.AST, fallback: ast.AST, message: str) -> None: + self.violations.append( + Violation( + line=getattr(node, "lineno", getattr(fallback, "lineno", 1)), + column=getattr( + node, + "col_offset", + getattr(fallback, "col_offset", 0), + ), + message=message, + ) + ) + + +__all__ = ["HtkVisitor", "Violation"] diff --git a/src/flake8_htk_rules/checks/datetime.py b/src/flake8_htk_rules/checks/datetime.py new file mode 100644 index 0000000..0dce7c0 --- /dev/null +++ b/src/flake8_htk_rules/checks/datetime.py @@ -0,0 +1,37 @@ +"""Datetime clarity checks.""" + +from __future__ import annotations + +import ast + + +DT100 = ( + "DT100 use 'import datetime' instead of " + "'from datetime import datetime'" +) +DT101 = ( + "DT101 use 'import datetime' instead of " + "'from datetime import date'" +) +DT102 = ( + "DT102 use 'import datetime' instead of " + "'from datetime import timedelta'" +) + +DATETIME_IMPORT_MESSAGES = { + "datetime": DT100, + "date": DT101, + "timedelta": DT102, +} + + +def check_import_from(node: ast.ImportFrom) -> list[tuple[ast.AST, str]]: + if node.module != "datetime" or node.level != 0: + return [] + + violations = [] + for alias in node.names: + message = DATETIME_IMPORT_MESSAGES.get(alias.name) + if message is not None: + violations.append((alias, message)) + return violations diff --git a/src/flake8_htk_rules/checks/debugger.py b/src/flake8_htk_rules/checks/debugger.py new file mode 100644 index 0000000..e02c1fc --- /dev/null +++ b/src/flake8_htk_rules/checks/debugger.py @@ -0,0 +1,91 @@ +"""Debugger prevention checks.""" + +from __future__ import annotations + +import ast +from dataclasses import dataclass, field + + +DB100 = "DB100 do not commit debugger imports" +DB101 = "DB101 do not commit debugger calls" + +DEBUGGER_MODULES = {"pdb", "ipdb", "pudb", "wdb"} +DEBUGGER_METHODS = { + "set_trace", + "post_mortem", + "pm", + "runcall", + "runctx", +} + + +@dataclass +class DebuggerState: + module_aliases: set[str] = field(default_factory=set) + call_aliases: set[str] = field(default_factory=set) + + +def check_import( + node: ast.Import, + state: DebuggerState, +) -> list[tuple[ast.AST, str]]: + violations = [] + for alias in node.names: + root_module = alias.name.split(".", 1)[0] + if root_module in DEBUGGER_MODULES: + state.module_aliases.add(alias.asname or root_module) + violations.append((alias, DB100)) + return violations + + +def check_import_from( + node: ast.ImportFrom, + state: DebuggerState, +) -> list[tuple[ast.AST, str]]: + root_module = (node.module or "").split(".", 1)[0] + if root_module not in DEBUGGER_MODULES: + return [] + + violations = [] + for alias in node.names: + violations.append((alias, DB100)) + if alias.name == "*": + state.call_aliases.update(DEBUGGER_METHODS) + elif alias.name in DEBUGGER_METHODS: + state.call_aliases.add(alias.asname or alias.name) + return violations + + +def check_call( + node: ast.Call, + state: DebuggerState, +) -> list[tuple[ast.AST, str]]: + name = _call_name(node.func) + if _is_debugger_call(name, state): + return [(node, DB101)] + return [] + + +def _call_name(node: ast.AST) -> str: + if isinstance(node, ast.Name): + return node.id + if isinstance(node, ast.Attribute): + parent_name = _call_name(node.value) + if parent_name: + return f"{parent_name}.{node.attr}" + return node.attr + return "" + + +def _is_debugger_call(name: str, state: DebuggerState) -> bool: + if name == "breakpoint" or name in state.call_aliases: + return True + if "." not in name: + return False + + module_name, method_name = name.rsplit(".", 1) + root_module = module_name.split(".", 1)[0] + return ( + method_name in DEBUGGER_METHODS + and root_module in DEBUGGER_MODULES | state.module_aliases + ) diff --git a/src/flake8_htk_rules/checks/naming.py b/src/flake8_htk_rules/checks/naming.py new file mode 100644 index 0000000..42929cb --- /dev/null +++ b/src/flake8_htk_rules/checks/naming.py @@ -0,0 +1,20 @@ +"""Naming precision checks.""" + +from __future__ import annotations + +import ast + + +NM100 = ( + "NM100 avoid get_ function prefix; choose a more precise verb " + "such as build_, calculate_, extract_, fetch_, look_up_, " + "retrieve_, format_, or transform_" +) + + +def check_function( + node: ast.FunctionDef | ast.AsyncFunctionDef, +) -> list[tuple[ast.AST, str]]: + if node.name.startswith("get_"): + return [(node, NM100)] + return [] diff --git a/src/flake8_htk_rules/checks/structured.py b/src/flake8_htk_rules/checks/structured.py new file mode 100644 index 0000000..d6b2340 --- /dev/null +++ b/src/flake8_htk_rules/checks/structured.py @@ -0,0 +1,84 @@ +"""Structured programming checks.""" + +from __future__ import annotations + +import ast +import os +from fnmatch import fnmatch + + +SP100 = ( + "SP100 function '{name}' has {count} return statements; " + "prefer a single return per function" +) +SP101 = ( + "SP101 return value should be a simple variable, attribute, or literal, " + "not a complex expression" +) + + +def should_check_file( + filename: str, + structured_programming_files: tuple[str, ...], +) -> bool: + if not structured_programming_files: + return False + + relative_path = os.path.relpath(filename, os.getcwd()).replace(os.sep, "/") + return any( + fnmatch(relative_path, pattern) + for pattern in structured_programming_files + ) + + +def check_function( + node: ast.FunctionDef | ast.AsyncFunctionDef, +) -> list[tuple[ast.AST, str]]: + violations: list[tuple[ast.AST, str]] = [] + returns = _collect_returns(node) + + if len(returns) > 1: + violations.append( + (node, SP100.format(name=node.name, count=len(returns))) + ) + + for return_node in returns: + if not _is_simple_return_value(return_node.value): + violations.append((return_node, SP101)) + + return violations + + +def _collect_returns( + function_node: ast.FunctionDef | ast.AsyncFunctionDef, +) -> list[ast.Return]: + collector = _ReturnCollector(function_node) + collector.visit(function_node) + return collector.returns + + +def _is_simple_return_value(value: ast.expr | None) -> bool: + return value is None or isinstance( + value, + (ast.Name, ast.Attribute, ast.Constant), + ) + + +class _ReturnCollector(ast.NodeVisitor): + def __init__(self, root: ast.FunctionDef | ast.AsyncFunctionDef) -> None: + self.root = root + self.returns: list[ast.Return] = [] + + def visit_Return(self, node: ast.Return) -> None: + self.returns.append(node) + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + if node is self.root: + self.generic_visit(node) + + def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: + if node is self.root: + self.generic_visit(node) + + def visit_Lambda(self, node: ast.Lambda) -> None: + return diff --git a/src/flake8_htk_rules/checks/types.py b/src/flake8_htk_rules/checks/types.py new file mode 100644 index 0000000..26ef7dc --- /dev/null +++ b/src/flake8_htk_rules/checks/types.py @@ -0,0 +1,12 @@ +"""Shared types for Hacktoolkit checks.""" + +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(frozen=True, order=True) +class Violation: + line: int + column: int + message: str From 6ca6fb8bd51623d2eec72373c81c2be0eb208246 Mon Sep 17 00:00:00 2001 From: Jonathan Tsai Date: Sat, 18 Apr 2026 21:16:20 -0700 Subject: [PATCH 8/8] Address review coverage and metadata --- .github/workflows/ci.yml | 1 + pyproject.toml | 2 +- src/flake8_htk_rules/checks/structured.py | 25 +++++++++-- tests/test_flake8_integration.py | 52 +++++++++++++++++++++++ tests/test_plugin.py | 22 ++++++++++ 5 files changed, 98 insertions(+), 4 deletions(-) create mode 100644 tests/test_flake8_integration.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c993ea3..006f8f0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,3 +21,4 @@ jobs: - run: python -m pip install --upgrade pip - run: python -m pip install -e ".[dev]" - run: python -m unittest discover -s tests -v + - run: python -m flake8 src tests diff --git a/pyproject.toml b/pyproject.toml index 993f0ed..e56604b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "hatchling.build" [project] name = "flake8-htk-rules" version = "0.1.0" -description = "Hacktoolkit Flake8 rules for structured Python code and datetime clarity." +description = "Hacktoolkit Flake8 rules for structured Python, datetime clarity, debugger prevention, and naming precision." readme = "README.md" requires-python = ">=3.9" license = "MIT" diff --git a/src/flake8_htk_rules/checks/structured.py b/src/flake8_htk_rules/checks/structured.py index d6b2340..db7712e 100644 --- a/src/flake8_htk_rules/checks/structured.py +++ b/src/flake8_htk_rules/checks/structured.py @@ -58,12 +58,31 @@ def _collect_returns( def _is_simple_return_value(value: ast.expr | None) -> bool: - return value is None or isinstance( - value, - (ast.Name, ast.Attribute, ast.Constant), + return ( + value is None + or isinstance(value, (ast.Name, ast.Attribute)) + or _is_literal_value(value) ) +def _is_literal_value(value: ast.expr) -> bool: + if isinstance(value, ast.Constant): + return True + if isinstance(value, (ast.Tuple, ast.List, ast.Set)): + return all(_is_literal_value(element) for element in value.elts) + if isinstance(value, ast.Dict): + return all( + key is not None and _is_literal_value(key) + for key in value.keys + ) and all(_is_literal_value(item) for item in value.values) + if isinstance(value, ast.UnaryOp) and isinstance( + value.op, + (ast.UAdd, ast.USub), + ): + return _is_literal_value(value.operand) + return False + + class _ReturnCollector(ast.NodeVisitor): def __init__(self, root: ast.FunctionDef | ast.AsyncFunctionDef) -> None: self.root = root diff --git a/tests/test_flake8_integration.py b/tests/test_flake8_integration.py new file mode 100644 index 0000000..9813551 --- /dev/null +++ b/tests/test_flake8_integration.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import os +import subprocess +import sys +import tempfile +import unittest +from pathlib import Path + + +class Flake8IntegrationTest(unittest.TestCase): + def test_installed_flake8_entry_point_reports_rule_families(self) -> None: + repo_root = Path(__file__).resolve().parents[1] + source = """ +from datetime import datetime +import pdb + +def get_total(values): + if values: + pdb.set_trace() + return sum(values) + return 0 +""" + with tempfile.TemporaryDirectory() as tmpdir: + sample_path = Path(tmpdir) / "sample.py" + sample_path.write_text(source) + relative_path = os.path.relpath(sample_path, repo_root).replace( + os.sep, + "/", + ) + result = subprocess.run( + [ + sys.executable, + "-m", + "flake8", + str(sample_path), + "--select=SP,DT,DB,NM", + f"--structured-programming-files={relative_path}", + ], + cwd=repo_root, + check=False, + text=True, + capture_output=True, + ) + + self.assertEqual(result.returncode, 1, result.stdout + result.stderr) + for code in ("DT100", "DB100", "NM100", "SP100", "DB101", "SP101"): + self.assertIn(code, result.stdout) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_plugin.py b/tests/test_plugin.py index d4d8197..9890afc 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -87,6 +87,28 @@ def sample(value): self.assertEqual(messages, []) + def test_allows_literal_return_values_for_configured_file(self) -> None: + messages = run_plugin( + """ +def sample(): + return {"ok": [1, 2, (3, -4)]} +""", + structured_programming_files=["accounts/services.py"], + ) + + self.assertEqual(messages, []) + + def test_flags_container_return_with_nonliteral_values(self) -> None: + messages = run_plugin( + """ +def sample(value): + return [value] +""", + structured_programming_files=["accounts/services.py"], + ) + + self.assertEqual(messages, [SP101]) + def test_skips_structured_programming_for_unconfigured_file(self) -> None: messages = run_plugin( """