Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/harden-pregnancy-takeup.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Harden CPS pregnancy take-up rates to use the build year and fall back across nearby CDC and ACS vintages.
5 changes: 4 additions & 1 deletion policyengine_us_data/datasets/cps/cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,7 +587,10 @@ def add_takeup(self):
get_state_pregnancy_rates,
)

pregnancy_rates = get_state_pregnancy_rates()
pregnancy_rates = get_state_pregnancy_rates(
cdc_year=self.time_period,
acs_year=self.time_period,
)
national_rate = 0.041 # fallback
pregnancy_rate_by_person = np.array(
[pregnancy_rates.get(s, national_rate) for s in person_states]
Expand Down
40 changes: 36 additions & 4 deletions policyengine_us_data/db/etl_pregnancy.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
)
from policyengine_us_data.utils.census import STATE_ABBREV_TO_FIPS
from policyengine_us_data.utils.db import (
DEFAULT_YEAR,
get_geographic_strata,
etl_argparser,
)
Expand Down Expand Up @@ -395,8 +396,8 @@ def load_pregnancy_data(


def get_state_pregnancy_rates(
cdc_year: int = 2023,
acs_year: int = 2023,
cdc_year: int = DEFAULT_YEAR,
acs_year: int = DEFAULT_YEAR,
) -> dict:
"""Return {state_abbrev: pregnancy_rate} for use by cps.py.

Expand All @@ -413,8 +414,39 @@ def get_state_pregnancy_rates(
rate (probability that a woman aged 15-44 is currently
pregnant).
"""
births_df = extract_cdc_births(cdc_year)
pop_df = extract_female_population(acs_year)
births_df = None
birth_errors = []
for candidate_cdc_year in [cdc_year, cdc_year - 1]:
try:
births_df = extract_cdc_births(candidate_cdc_year)
break
except Exception as e:
birth_errors.append(f"{candidate_cdc_year}: {e}")
logger.warning(
f"CDC VSRR {candidate_cdc_year} not available for take-up: {e}"
)
if births_df is None:
raise RuntimeError(
"No CDC VSRR birth data for pregnancy take-up rates. "
f"Tried {cdc_year} and {cdc_year - 1}: {'; '.join(birth_errors)}"
)

pop_df = None
population_errors = []
for candidate_acs_year in [acs_year, acs_year - 1, acs_year - 2]:
try:
pop_df = extract_female_population(candidate_acs_year)
break
except Exception as e:
population_errors.append(f"{candidate_acs_year}: {e}")
logger.warning(f"ACS {candidate_acs_year} not available for take-up: {e}")
if pop_df is None:
raise RuntimeError(
"No ACS female population data for pregnancy take-up rates. "
f"Tried {acs_year}, {acs_year - 1}, and {acs_year - 2}: "
f"{'; '.join(population_errors)}"
)

df = transform_pregnancy_data(births_df, pop_df)
return dict(zip(df["state_abbrev"], df["pregnancy_rate"]))

Expand Down
36 changes: 36 additions & 0 deletions tests/unit/db/test_etl_pregnancy.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pandas as pd

from policyengine_us_data.db import etl_pregnancy


Expand Down Expand Up @@ -124,3 +126,37 @@ def fake_get(url, **kwargs):
),
]
assert saved == [("census_b01001_female_15_44_2023.json", b01001_payload())]


def test_get_state_pregnancy_rates_falls_back_to_available_years(monkeypatch):
calls = []

def fake_extract_cdc_births(year):
calls.append(("births", year))
if year == 2024:
raise RuntimeError("CDC unavailable")
return pd.DataFrame({"state_abbrev": ["AL"], "births": [52_000]})

def fake_extract_female_population(year):
calls.append(("population", year))
if year in (2024, 2023):
raise RuntimeError("ACS unavailable")
return pd.DataFrame({"state_abbrev": ["AL"], "female_15_44": [1_000_000]})

monkeypatch.setattr(etl_pregnancy, "extract_cdc_births", fake_extract_cdc_births)
monkeypatch.setattr(
etl_pregnancy,
"extract_female_population",
fake_extract_female_population,
)

rates = etl_pregnancy.get_state_pregnancy_rates(cdc_year=2024, acs_year=2024)

assert calls == [
("births", 2024),
("births", 2023),
("population", 2024),
("population", 2023),
("population", 2022),
]
assert rates == {"AL": 0.039}