Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 15 additions & 16 deletions workers/shared/infrastructure/config/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,27 +323,26 @@ def _verify_pluggable_worker_exists(worker_type: WorkerType) -> bool:

try:
import importlib
from pathlib import Path

# Check if the worker.py file exists
# Path resolution: builder.py is at /app/workers/shared/infrastructure/config/
# We need to get to /app/workers/, so go up 3 levels
pluggable_worker_path = (
Path(__file__).resolve().parents[3]
/ "pluggable_worker"
/ worker_type.value
/ "worker.py"
)
import importlib.util
Comment thread
greptile-apps[bot] marked this conversation as resolved.

# Ask Python's import system whether the module is resolvable.
# find_spec() consults all registered finders and handles every
# module representation (source .py, Nuitka/Cython .so, .pyc,
# namespace packages, zipimports) — unlike a filesystem check
# for a specific file extension, which breaks for compiled plugins.
module_path = f"pluggable_worker.{worker_type.value}.worker"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hope this not break running worker locally insated of docker

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tested locally — doesn't break local runs. find_spec() is actually a superset of the previous filesystem check.

Local runs go through workers/run-worker.sh, which sets export PYTHONPATH="$WORKERS_DIR:${PYTHONPATH:-}" (line 702) before launching celery. So import pluggable_worker.<name>.worker resolves the same way the launcher itself uses the dotted module name at line 444 (celery_app_module="pluggable_worker.${worker_type}.worker").

Coverage comparison vs old code:

Scenario Old (Path(...)/worker.py) New (find_spec)
Local .py via run-worker.sh ✅ (PYTHONPATH set by script)
Docker with uncompiled plugin
Docker with compiled .so (Cython/Nuitka) ❌ (checks .py)
OSS with no pluggable plugins n/a — is_pluggable() returns False, early return same
Bare python worker.py from repo root, no PYTHONPATH ❌ (and would fail at actual import too)

The only way someone could get a false negative from the new code but not the old is if they ran a pluggable worker outside run-worker.sh without any PYTHONPATH setup — in that case the old check returned True but celery would have crashed on the real import a few lines later. So net change: we fail earlier with a cleaner log instead of getting a stack trace.

Also confirmed on snapshot.180420262 locally: pluggable workers (agentic_callback + bulk_download worker.so) register and run fine — previously they crash-looped because Path(...)/worker.py returned False for compiled modules.


if not pluggable_worker_path.exists():
logger.error(f"Pluggable worker file not found: {pluggable_worker_path}")
if importlib.util.find_spec(module_path) is None:
Comment thread
greptile-apps[bot] marked this conversation as resolved.
logger.error(f"Pluggable worker module not importable: {module_path}")
return False

# Try to import the module to verify it's valid
module_path = f"pluggable_worker.{worker_type.value}.worker"
# Load it to catch findable-but-broken modules (e.g. import errors
# inside worker.py that find_spec wouldn't surface).
importlib.import_module(module_path)

except ImportError:
except (ImportError, ValueError):
# ValueError: find_spec raises this when sys.modules[module_path] is
# populated but has __spec__ = None (from a prior failed import).
logger.exception(f"Failed to import pluggable worker {worker_type.value}")
return False
except (OSError, AttributeError):
Expand Down
6 changes: 4 additions & 2 deletions workers/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,8 +448,10 @@ def on_task_postrun(sender=None, task_id=None, **kwargs):
worker_directory = os.path.join("pluggable_worker", worker_type.value)
worker_path = os.path.join(base_dir, worker_directory)
else:
# Core workers use their value directly (with hyphens converted to underscores where needed)
worker_directory = worker_type.value.replace("-", "_")
# Enum values use underscores (Python module names); a few on-disk dirs
# still use hyphens (e.g. api-deployment). Derive the directory from the
# authoritative import-path map on WorkerType instead of a blind replace.
worker_directory = worker_type.to_import_path().rsplit(".", 1)[0]
worker_path = os.path.join(base_dir, worker_directory)

# Add worker directory to path for task imports
Expand Down