From 40a4b159eeb102b6a057f52545e09b7e5a29b7bd Mon Sep 17 00:00:00 2001 From: Vlad Mencl Date: Mon, 25 May 2026 17:34:11 +1200 Subject: [PATCH] new: add map_threads option for ThreadPool size in map pipe Allow configuring ThreadPool size used in map pipe. Passing default value None makes ThreadPool use os.process_cpu_count(), keeping existing behaviour. Setting the value to 1 provides a workaround for #315 (avoid parallelism). Given that most processing happens withing Python without releasing GIL, it actually does not incur any significant penalty. --- src/pyff/builtins.py | 4 ++-- src/pyff/constants.py | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/pyff/builtins.py b/src/pyff/builtins.py index 9c560956..211bf64e 100644 --- a/src/pyff/builtins.py +++ b/src/pyff/builtins.py @@ -22,7 +22,7 @@ from lxml.etree import DocumentInvalid from str2bool import str2bool -from pyff.constants import NS +from pyff.constants import NS, config from pyff.decorators import deprecated from pyff.exceptions import MetadataException from pyff.logs import get_log @@ -111,7 +111,7 @@ def _p(e): from multiprocessing.pool import ThreadPool - pool = ThreadPool() + pool = ThreadPool(config.map_threads) result = pool.map(_p, iter_entities(req.t), chunksize=10) log.info(f"processed {len(result)} entities") diff --git a/src/pyff/constants.py b/src/pyff/constants.py index acdc9063..b9799b30 100644 --- a/src/pyff/constants.py +++ b/src/pyff/constants.py @@ -344,6 +344,10 @@ class Config: "info_buffer_size", default=10, typeconv=as_int, info="how much history to keep about each metadata URL" ) + map_threads = S( + "map_threads", typeconv=as_int, info="how many threads to create in map pipe [default: os.process_cpu_count() ]" + ) + worker_pool_size = S( "worker_pool_size", default=1, cmdline=['pyffd'], typeconv=as_int, info="how many gunicorn workers to run" )