diff --git a/diff_diff/bacon.py b/diff_diff/bacon.py index a2314086..1ef55b7d 100644 --- a/diff_diff/bacon.py +++ b/diff_diff/bacon.py @@ -1085,6 +1085,7 @@ def bacon_decompose( Use 0 (or np.inf) for never-treated units. weights : str, default="approximate" Weight calculation method: + - "approximate": Fast simplified formula (default). Good for diagnostic purposes where relative weights are sufficient. - "exact": Variance-based weights from Goodman-Bacon (2021) @@ -1094,6 +1095,7 @@ def bacon_decompose( ------- BaconDecompositionResults Object containing decomposition results with: + - twfe_estimate: The overall TWFE coefficient - comparisons: List of all 2x2 comparisons with estimates and weights - Weight totals by comparison type diff --git a/diff_diff/chaisemartin_dhaultfoeuille_results.py b/diff_diff/chaisemartin_dhaultfoeuille_results.py index 6ddfb0d6..e22262f4 100644 --- a/diff_diff/chaisemartin_dhaultfoeuille_results.py +++ b/diff_diff/chaisemartin_dhaultfoeuille_results.py @@ -701,7 +701,7 @@ def __repr__(self) -> str: @property def coef_var(self) -> float: - """SE / |DID_M|; NaN when DID_M is 0 or SE non-finite.""" + """SE / abs(DID_M); NaN when DID_M is 0 or SE non-finite.""" if not (np.isfinite(self.overall_se) and self.overall_se >= 0): return np.nan if not np.isfinite(self.overall_att) or self.overall_att == 0: diff --git a/diff_diff/continuous_did_results.py b/diff_diff/continuous_did_results.py index f87f9c65..f3fed115 100644 --- a/diff_diff/continuous_did_results.py +++ b/diff_diff/continuous_did_results.py @@ -156,7 +156,7 @@ def __repr__(self) -> str: @property def coef_var(self) -> float: - """Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite.""" + """Coefficient of variation: SE / abs(overall ATT). NaN when ATT is 0 or SE non-finite.""" if not (np.isfinite(self.overall_att_se) and self.overall_att_se >= 0): return np.nan if not np.isfinite(self.overall_att) or self.overall_att == 0: diff --git a/diff_diff/diagnostic_report.py b/diff_diff/diagnostic_report.py index f5162038..f9a21940 100644 --- a/diff_diff/diagnostic_report.py +++ b/diff_diff/diagnostic_report.py @@ -256,10 +256,9 @@ class DiagnosticReport: Column names identifying the panel structure. pre_periods, post_periods : list, optional Explicit pre- and post-treatment period labels. - run_parallel_trends, run_sensitivity, run_placebo, run_bacon, - run_design_effect, run_heterogeneity, run_epv, run_pretrends_power : bool + run_parallel_trends, run_sensitivity, run_placebo, run_bacon, run_design_effect, run_heterogeneity, run_epv, run_pretrends_power : bool Per-check opt-in flags. ``run_placebo`` defaults to ``False`` (opt-in, - expensive, currently not implemented — placebo key remains reserved + expensive, currently not implemented - placebo key remains reserved as ``skipped`` in the schema). All other checks default to ``True`` and are further gated by estimator-type and instance-level applicability (see ``docs/methodology/REPORTING.md``). diff --git a/diff_diff/efficient_did_results.py b/diff_diff/efficient_did_results.py index 13123ea8..2e6e0463 100644 --- a/diff_diff/efficient_did_results.py +++ b/diff_diff/efficient_did_results.py @@ -180,7 +180,7 @@ def __repr__(self) -> str: @property def coef_var(self) -> float: - """Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite.""" + """Coefficient of variation: SE / abs(overall ATT). NaN when ATT is 0 or SE non-finite.""" if not (np.isfinite(self.overall_se) and self.overall_se >= 0): return np.nan if not np.isfinite(self.overall_att) or self.overall_att == 0: diff --git a/diff_diff/had.py b/diff_diff/had.py index 6a717bcb..c971f85e 100644 --- a/diff_diff/had.py +++ b/diff_diff/had.py @@ -233,6 +233,7 @@ class HeterogeneousAdoptionDiDResults: ``(Ybar_{Z=1} - Ybar_{Z=0}) / (Dbar_{Z=1} - Dbar_{Z=0})``. se : float Standard error on the beta-scale. For continuous designs: + - Unweighted or ``weights=``: CCT-2014 weighted-robust SE from Phase 1c divided by ``|den|`` (``den`` = raw or weighted denominator depending on fit path). @@ -241,6 +242,7 @@ class HeterogeneousAdoptionDiDResults: aligned with ``tau_bc``) routed through :func:`compute_survey_if_variance` for PSU-aggregated, FPC/strata-adjusted variance, divided by ``|den|``. + In both cases the higher-order variance from ``mean(ΔY)`` is dominated by the nonparametric boundary estimate in large samples and is not included in the leading-order formula. For diff --git a/diff_diff/honest_did.py b/diff_diff/honest_did.py index 6730618c..110e0261 100644 --- a/diff_diff/honest_did.py +++ b/diff_diff/honest_did.py @@ -40,7 +40,10 @@ class DeltaSD: Smoothness restriction on trend violations (Delta^{SD}). Restricts the second differences of the trend violations: - |delta_{t+1} - 2*delta_t + delta_{t-1}| <= M + + .. math:: + + |\\delta_{t+1} - 2\\delta_t + \\delta_{t-1}| \\le M When M=0, this enforces that violations follow a linear trend (linear extrapolation of pre-trends). Larger M allows more @@ -75,7 +78,10 @@ class DeltaRM: Post-treatment consecutive first differences are bounded by Mbar times the maximum pre-treatment first difference: - |delta_{t+1} - delta_t| <= Mbar * max_{s<0} |delta_{s+1} - delta_s| + + .. math:: + + |\\delta_{t+1} - \\delta_t| \\le \\overline{M} \\cdot \\max_{s<0} |\\delta_{s+1} - \\delta_s| When Mbar=0, this enforces zero post-treatment first differences. Mbar=1 means post-period first differences can be as large as the @@ -109,8 +115,9 @@ class DeltaSDRM: Combined smoothness and relative magnitudes restriction. Imposes both: - 1. Smoothness: |delta_{t+1} - 2*delta_t + delta_{t-1}| <= M - 2. Relative magnitudes: |delta_{t+1} - delta_t| <= Mbar * max_{s<0} |delta_{s+1} - delta_s| + + 1. Smoothness: :math:`|\\delta_{t+1} - 2\\delta_t + \\delta_{t-1}| \\le M` + 2. Relative magnitudes: :math:`|\\delta_{t+1} - \\delta_t| \\le \\overline{M} \\cdot \\max_{s<0} |\\delta_{s+1} - \\delta_s|` This is more restrictive than either constraint alone. diff --git a/diff_diff/imputation.py b/diff_diff/imputation.py index b5454c6f..b24a482b 100644 --- a/diff_diff/imputation.py +++ b/diff_diff/imputation.py @@ -75,7 +75,7 @@ class ImputationDiD(ImputationDiDBootstrapMixin): - "silent": Drop columns silently horizon_max : int, optional Maximum event-study horizon. If set, event study effects are only - computed for |h| <= horizon_max. + computed for abs(h) <= horizon_max. aux_partition : str, default="cohort_horizon" Controls the auxiliary model partition for Theorem 3 variance: - "cohort_horizon": Groups by cohort x relative time (tightest SEs) diff --git a/diff_diff/imputation_results.py b/diff_diff/imputation_results.py index e7f7613c..95260b9e 100644 --- a/diff_diff/imputation_results.py +++ b/diff_diff/imputation_results.py @@ -104,9 +104,9 @@ class ImputationDiDResults: n_obs : int Total number of observations. n_treated_obs : int - Number of treated observations (|Omega_1|). + Number of treated observations (:math:`|\\Omega_1|`). n_untreated_obs : int - Number of untreated observations (|Omega_0|). + Number of untreated observations (:math:`|\\Omega_0|`). n_treated_units : int Number of ever-treated units. n_control_units : int @@ -155,7 +155,7 @@ def __repr__(self) -> str: @property def coef_var(self) -> float: - """Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite.""" + """Coefficient of variation: SE / abs(overall ATT). NaN when ATT is 0 or SE non-finite.""" if not (np.isfinite(self.overall_se) and self.overall_se >= 0): return np.nan if not np.isfinite(self.overall_att) or self.overall_att == 0: diff --git a/diff_diff/prep.py b/diff_diff/prep.py index e942cab9..3c47832c 100644 --- a/diff_diff/prep.py +++ b/diff_diff/prep.py @@ -834,6 +834,7 @@ def rank_control_units( ------- pd.DataFrame Ranked control units with columns: + - unit: Unit identifier - quality_score: Combined quality score (0-1, higher is better) - outcome_trend_score: Pre-treatment outcome trend similarity @@ -846,6 +847,7 @@ def rank_control_units( - is_required: Whether unit was in require_units If suggest_treatment_candidates=True (and no treated units): + - unit: Unit identifier - treatment_candidate_score: Suitability as treatment unit - avg_outcome_level: Pre-treatment outcome mean diff --git a/diff_diff/results.py b/diff_diff/results.py index 69d48b8f..47c1cf8c 100644 --- a/diff_diff/results.py +++ b/diff_diff/results.py @@ -141,7 +141,7 @@ def __repr__(self) -> str: @property def coef_var(self) -> float: - """Coefficient of variation: SE / |ATT|. NaN when ATT is 0 or SE non-finite.""" + """Coefficient of variation: SE / abs(ATT). NaN when ATT is 0 or SE non-finite.""" if not (np.isfinite(self.se) and self.se >= 0): return np.nan if not np.isfinite(self.att) or self.att == 0: @@ -468,7 +468,7 @@ def post_period_effects(self) -> Dict[Any, PeriodEffect]: @property def coef_var(self) -> float: - """Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite.""" + """Coefficient of variation: SE / abs(overall ATT). NaN when ATT is 0 or SE non-finite.""" if not (np.isfinite(self.avg_se) and self.avg_se >= 0): return np.nan if not np.isfinite(self.avg_att) or self.avg_att == 0: @@ -919,7 +919,7 @@ def __getstate__(self) -> Dict[str, Any]: @property def coef_var(self) -> float: - """Coefficient of variation: SE / |ATT|. NaN when ATT is 0 or SE non-finite.""" + """Coefficient of variation: SE / abs(ATT). NaN when ATT is 0 or SE non-finite.""" if not (np.isfinite(self.se) and self.se >= 0): return np.nan if not np.isfinite(self.att) or self.att == 0: @@ -1114,14 +1114,16 @@ def get_loo_effects_df(self) -> pd.DataFrame: full-design survey jackknife path, which uses PSU-level LOO). Available on: + * non-survey jackknife fits (classical Arkhangelsky Algorithm 3). * pweight-only survey jackknife fits (Algorithm 3 with post-hoc ω_eff composition; PSU labels in ``survey_metadata`` come from implicit-PSU metadata but the LOO remains unit-level). Blocked on: + * full-design survey jackknife fits (strata / PSU / FPC set in - ``SurveyDesign``) — the underlying replicates are PSU-level + ``SurveyDesign``) - the underlying replicates are PSU-level ``τ̂_{(h,j)}`` (Rust & Rao 1996), not unit-level. See ``result.placebo_effects`` for the raw PSU-level replicate array and REGISTRY §SyntheticDiD "Note (survey + jackknife @@ -1142,10 +1144,12 @@ def get_loo_effects_df(self) -> pd.DataFrame: ------- pd.DataFrame Columns: - - ``unit`` — user's unit ID - - ``role`` — ``'control'`` or ``'treated'`` - - ``att_loo`` — ATT with this unit dropped - - ``delta_from_full`` — ``att_loo - self.att`` + + - ``unit`` - user's unit ID + - ``role`` - ``'control'`` or ``'treated'`` + - ``att_loo`` - ATT with this unit dropped + - ``delta_from_full`` - ``att_loo - self.att`` + Sorted by ``|delta_from_full|`` descending, NaN rows at the end. """ if self.variance_method != "jackknife": diff --git a/diff_diff/stacked_did_results.py b/diff_diff/stacked_did_results.py index fb5bfb96..7145d86c 100644 --- a/diff_diff/stacked_did_results.py +++ b/diff_diff/stacked_did_results.py @@ -109,7 +109,7 @@ def __repr__(self) -> str: @property def coef_var(self) -> float: - """Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite.""" + """Coefficient of variation: SE / abs(overall ATT). NaN when ATT is 0 or SE non-finite.""" if not (np.isfinite(self.overall_se) and self.overall_se >= 0): return np.nan if not np.isfinite(self.overall_att) or self.overall_att == 0: diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py index ec596647..a57e8a27 100644 --- a/diff_diff/staggered.py +++ b/diff_diff/staggered.py @@ -175,16 +175,19 @@ class CallawaySantAnna( Random seed for reproducibility. rank_deficient_action : str, default="warn" Action when design matrix is rank-deficient (linearly dependent columns): + - "warn": Issue warning and drop linearly dependent columns (default) - "error": Raise ValueError - "silent": Drop columns silently without warning base_period : str, default="varying" Method for selecting the base (reference) period for computing ATT(g,t). Options: + - "varying": For pre-treatment periods (t < g - anticipation), use t-1 as base (consecutive comparisons). For post-treatment, use g-1-anticipation. Requires t-1 to exist in data. - "universal": Always use g-1-anticipation as base period. + Both produce identical post-treatment effects. Matches R's did::att_gt() base_period parameter. cband : bool, default=True @@ -217,12 +220,14 @@ class CallawaySantAnna( pscore_fallback : str, default="error" Action when propensity score estimation fails entirely (``LinAlgError`` or ``ValueError`` from IRLS): + - "error": Raise the exception (default). Ensures the user is aware of estimation failures. - "unconditional": Fall back to unconditional propensity with a warning. For IPW, this drops all covariates. For DR, the propensity model becomes unconditional but outcome regression still uses covariates. + When ``rank_deficient_action="error"``, errors are always re-raised regardless of this setting. diff --git a/diff_diff/staggered_results.py b/diff_diff/staggered_results.py index 9c8f5275..c1f4174b 100644 --- a/diff_diff/staggered_results.py +++ b/diff_diff/staggered_results.py @@ -150,7 +150,7 @@ def __repr__(self) -> str: @property def coef_var(self) -> float: - """Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite.""" + """Coefficient of variation: SE / abs(overall ATT). NaN when ATT is 0 or SE non-finite.""" if not (np.isfinite(self.overall_se) and self.overall_se >= 0): return np.nan if not np.isfinite(self.overall_att) or self.overall_att == 0: diff --git a/diff_diff/staggered_triple_diff_results.py b/diff_diff/staggered_triple_diff_results.py index 6ffc0738..6096e3dd 100644 --- a/diff_diff/staggered_triple_diff_results.py +++ b/diff_diff/staggered_triple_diff_results.py @@ -107,7 +107,7 @@ def __repr__(self) -> str: @property def coef_var(self) -> float: - """Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite.""" + """Coefficient of variation: SE / abs(overall ATT). NaN when ATT is 0 or SE non-finite.""" if not (np.isfinite(self.overall_se) and self.overall_se >= 0): return np.nan if not np.isfinite(self.overall_att) or self.overall_att == 0: diff --git a/diff_diff/sun_abraham.py b/diff_diff/sun_abraham.py index f3c78f8e..6ad113f8 100644 --- a/diff_diff/sun_abraham.py +++ b/diff_diff/sun_abraham.py @@ -105,7 +105,7 @@ def __repr__(self) -> str: @property def coef_var(self) -> float: - """Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite.""" + """Coefficient of variation: SE / abs(overall ATT). NaN when ATT is 0 or SE non-finite.""" if not (np.isfinite(self.overall_se) and self.overall_se >= 0): return np.nan if not np.isfinite(self.overall_att) or self.overall_att == 0: diff --git a/diff_diff/synthetic_did.py b/diff_diff/synthetic_did.py index dc7ae03c..209ce7b4 100644 --- a/diff_diff/synthetic_did.py +++ b/diff_diff/synthetic_did.py @@ -35,6 +35,7 @@ class SyntheticDiD(DifferenceInDifferences): pre-treatment trends. This method is particularly useful when: + - You have few treated units (possibly just one) - Parallel trends assumption may be questionable - Control units are heterogeneous and need reweighting @@ -52,6 +53,7 @@ class SyntheticDiD(DifferenceInDifferences): Significance level for confidence intervals. variance_method : str, default="placebo" Method for variance estimation: + - "placebo": Placebo-based variance matching R's synthdid::vcov(method="placebo"). Implements Algorithm 4 from Arkhangelsky et al. (2021). Library default (R's default is ``"bootstrap"``; we default to placebo because it is diff --git a/diff_diff/triple_diff.py b/diff_diff/triple_diff.py index 965f13a8..958903c2 100644 --- a/diff_diff/triple_diff.py +++ b/diff_diff/triple_diff.py @@ -365,6 +365,7 @@ class TripleDifference: ---------- estimation_method : str, default="dr" Estimation method to use: + - "dr": Doubly robust (recommended). Consistent if either the outcome model or propensity score model is correctly specified. - "reg": Regression adjustment (outcome regression). @@ -385,6 +386,7 @@ class TripleDifference: or above (1 - pscore_trim) are clipped to avoid extreme weights. rank_deficient_action : str, default="warn" Action when design matrix is rank-deficient (linearly dependent columns): + - "warn": Issue warning and drop linearly dependent columns (default) - "error": Raise ValueError - "silent": Drop columns silently without warning @@ -397,11 +399,13 @@ class TripleDifference: (1996). Only applies to IPW and DR estimation methods. pscore_fallback : str, default="error" Action when propensity score estimation fails: + - "error": Raise the exception (default) - "unconditional": Fall back to unconditional propensity with a warning. For IPW, drops all covariates. For DR, the propensity model becomes unconditional but outcome regression still uses covariates. + When ``rank_deficient_action="error"``, errors are always re-raised regardless of this setting. diff --git a/diff_diff/trop.py b/diff_diff/trop.py index 76a44b43..d4bd1840 100644 --- a/diff_diff/trop.py +++ b/diff_diff/trop.py @@ -53,7 +53,7 @@ class TROP(TROPLocalMixin, TROPGlobalMixin): 2. **Exponential distance-based unit weights**: ω_j = exp(-λ_unit × d(j,i)) where d(j,i) is the RMSE of outcome differences between units - 3. **Exponential time decay weights**: θ_s = exp(-λ_time × |s-t|) + 3. **Exponential time decay weights**: θ_s = exp(-λ_time × :math:`|s-t|`) weighting pre-treatment periods by proximity to treatment Tuning parameters (λ_time, λ_unit, λ_nn) are selected via leave-one-out diff --git a/diff_diff/trop_results.py b/diff_diff/trop_results.py index 66d85d8c..2d3d8535 100644 --- a/diff_diff/trop_results.py +++ b/diff_diff/trop_results.py @@ -162,7 +162,7 @@ def __repr__(self) -> str: @property def coef_var(self) -> float: - """Coefficient of variation: SE / |ATT|. NaN when ATT is 0 or SE non-finite.""" + """Coefficient of variation: SE / abs(ATT). NaN when ATT is 0 or SE non-finite.""" if not (np.isfinite(self.se) and self.se >= 0): return np.nan if not np.isfinite(self.att) or self.att == 0: diff --git a/diff_diff/twfe.py b/diff_diff/twfe.py index 05d0d10d..7409b621 100644 --- a/diff_diff/twfe.py +++ b/diff_diff/twfe.py @@ -602,6 +602,7 @@ def decompose( Use 0 (or np.inf) for never-treated units. weights : str, default="approximate" Weight calculation method: + - "approximate": Fast simplified formula (default). Good for diagnostic purposes where relative weights are sufficient. - "exact": Variance-based weights from Goodman-Bacon (2021) diff --git a/diff_diff/two_stage.py b/diff_diff/two_stage.py index 1522c7cd..9fe3991f 100644 --- a/diff_diff/two_stage.py +++ b/diff_diff/two_stage.py @@ -86,7 +86,7 @@ class TwoStageDiD(TwoStageDiDBootstrapMixin): - "silent": Drop columns silently horizon_max : int, optional Maximum event-study horizon. If set, event study effects are only - computed for |h| <= horizon_max. + computed for abs(h) <= horizon_max. pretrends : bool, default=False If True, event study includes pre-treatment horizons for visual pre-trends assessment. Pre-period effects should be ~0 under diff --git a/diff_diff/two_stage_results.py b/diff_diff/two_stage_results.py index d7cf7c8c..96a013bb 100644 --- a/diff_diff/two_stage_results.py +++ b/diff_diff/two_stage_results.py @@ -153,7 +153,7 @@ def __repr__(self) -> str: @property def coef_var(self) -> float: - """Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite.""" + """Coefficient of variation: SE / abs(overall ATT). NaN when ATT is 0 or SE non-finite.""" if not (np.isfinite(self.overall_se) and self.overall_se >= 0): return np.nan if not np.isfinite(self.overall_att) or self.overall_att == 0: diff --git a/docs/api/local_linear.rst b/docs/api/local_linear.rst index db4149b1..2b55a7dc 100644 --- a/docs/api/local_linear.rst +++ b/docs/api/local_linear.rst @@ -45,14 +45,10 @@ to ``int_0^1 k(u) du = 1/2``; the uniform kernel uses .. autofunction:: diff_diff.uniform_kernel -.. autodata:: diff_diff.KERNELS - :annotation: : dict[str, Callable[[np.ndarray], np.ndarray]] - :no-value: - - Mapping from kernel name (``"epanechnikov"`` / ``"triangular"`` / - ``"uniform"``) to its callable evaluator on ``[0, 1]``. Pass the name - string (not the callable) to ``local_linear_fit`` and - ``mse_optimal_bandwidth`` via their ``kernel=`` argument. +.. py:data:: diff_diff.KERNELS + :type: dict[str, Callable[[np.ndarray], np.ndarray]] + + Mapping from kernel name (``"epanechnikov"`` / ``"triangular"`` / ``"uniform"``) to its callable evaluator on ``[0, 1]``. Pass the name string (not the callable) to ``local_linear_fit`` and ``mse_optimal_bandwidth`` via their ``kernel=`` argument. .. autofunction:: diff_diff.kernel_moments diff --git a/docs/api/trop.rst b/docs/api/trop.rst index f167e295..8d1eea47 100644 --- a/docs/api/trop.rst +++ b/docs/api/trop.rst @@ -12,7 +12,7 @@ which combines three robustness components: 2. **Exponential distance-based unit weights**: ω_j = exp(-λ_unit × d(j,i)) where d(j,i) is the pairwise RMSE between units over pre-treatment periods -3. **Exponential time decay weights**: θ_s = exp(-λ_time × |t-s|) +3. **Exponential time decay weights**: θ_s = exp(-λ_time × :math:`|t-s|`) weighting periods by proximity to the specific treatment period t **When to use TROP:**