diff --git a/tests/testthat/test-boundary-detection-enhanced.R b/tests/testthat/test-boundary-detection-enhanced.R index dec2c7c..fe3a7cd 100644 --- a/tests/testthat/test-boundary-detection-enhanced.R +++ b/tests/testthat/test-boundary-detection-enhanced.R @@ -81,23 +81,21 @@ test_that("boundary detection handles large coefficients", { test_that("boundary detection handles large standard errors", { # Create data designed to have large SEs WITHOUT boundary issues + # Use random binary outcome (not alternating) to avoid accidental separation + # Use many confounders relative to sample size to generate large SEs set.seed(789) n <- 50 # Smaller sample for larger SEs test_data_large_se <- data.frame( outcome = rbinom(n, 1, 0.5), # Moderate probability, not near boundaries - exposure = factor(sample(c("No", "Yes"), n, replace = TRUE, prob = c(0.5, 0.5))), - confounder = rnorm(n) + exposure = factor(sample(c("No", "Yes"), n, replace = TRUE)), + x1 = rnorm(n), x2 = rnorm(n), x3 = rnorm(n), x4 = rnorm(n) ) - # Ensure we're not creating boundary conditions - # Check baseline risks are away from 0 and 1 - table(test_data_large_se$outcome, test_data_large_se$exposure) - boundary_large_se <- calc_risk_diff( data = test_data_large_se, outcome = "outcome", exposure = "exposure", - adjust_vars = "confounder" # Adjustment with small n creates large SEs + adjust_vars = c("x1", "x2", "x3", "x4") # Many confounders relative to sample size ) # The test should check for EITHER large SEs OR boundary near @@ -110,14 +108,15 @@ test_that("boundary detection handles large standard errors", { test_that("boundary detection specifically identifies large standard errors", { # Create a scenario where SEs are large but probs are moderate - # Use a very small sample with balanced outcome + # Use random binary outcome to avoid accidental perfect separation + # Use many confounders relative to sample size to reliably generate large SEs set.seed(999) - n <- 20 # Very small sample + n <- 50 # Small sample with many confounders test_data <- data.frame( - exposure = factor(rep(c("No", "Yes"), each = n/2)), - outcome = rep(c(0, 1, 0, 1, 0, 1, 0, 1, 0, 1), 2), # Alternating, moderate mean + outcome = rbinom(n, 1, 0.5), # Random binary, not perfect separation + exposure = factor(sample(c("No", "Yes"), n, replace = TRUE)), x1 = rnorm(n), x2 = rnorm(n), x3 = rnorm(n), @@ -134,7 +133,7 @@ test_that("boundary detection specifically identifies large standard errors", { ) }) - # With 20 observations and 6 parameters, we should get large SEs + # With 50 observations and 6 parameters, we should get large SEs # Check the standard error is actually large if (result$boundary_type != "large_standard_errors") { # If boundary type is different, check if SE is still large diff --git a/tests/testthat/test-calc-risk-diff.R b/tests/testthat/test-calc-risk-diff.R index b3cfe84..5d87f7a 100644 --- a/tests/testthat/test-calc-risk-diff.R +++ b/tests/testthat/test-calc-risk-diff.R @@ -40,7 +40,7 @@ create_cachar_inspired_data <- function(n = 1500, seed = 2025) { ) %>% dplyr::mutate( # Age structure matching Northeast India patterns - age = sample(18:70, n, replace = TRUE, + age = sample(18:90, n, replace = TRUE, prob = c(rep(0.8, 22), rep(1.2, 30), rep(0.6, 21))), # Sex distribution (male predominant in screening studies)