Merge pull request #400 from tidymodels/allow-sparse-docs

topepo · web-flow · commit 3247d4fc63b5 · 2021-01-12T12:55:47.000-05:00
Document `allow_sparse_x`
diff --git a/R/aaa_models.R b/R/aaa_models.R
@@ -352,7 +352,7 @@ check_interface_val <- function(x) {
 #'  a formula interface, typically some predictor preprocessing must
 #'  be conducted. `glmnet` is a good example of this.
 #'
-#'   There are three options that can be used for the encodings:
+#'   There are four options that can be used for the encodings:
 #'
 #'  `predictor_indicators` describes whether and how to create indicator/dummy
 #'  variables from factor predictors. There are three options: `"none"` (do not
@@ -369,10 +369,15 @@ check_interface_val <- function(x) {
 #'  intercept, `model.matrix()` computes a full set of indicators for the
 #'  _first_ factor variable, but an incomplete set for the remainder.
 #'
-#'  Finally, the option `remove_intercept` will remove the intercept column
+#'  Next, the option `remove_intercept` will remove the intercept column
 #'  _after_ `model.matrix()` is finished. This can be useful if the model
 #'  function (e.g. `lm()`) automatically generates an intercept.
 #'
+#' Finally, `allow_sparse_x` specifies whether the model function can natively
+#'  accommodate a sparse matrix representation for predictors during fitting
+#'  and tuning.
+#'
+#'
 #' @references "How to build a parsnip model"
 #'  \url{https://www.tidymodels.org/learn/develop/models/}
 #' @examples
diff --git a/man/set_new_model.Rd b/man/set_new_model.Rd
diff --git a/tests/testthat/recipes_examples.RData b/tests/testthat/recipes_examples.RData
diff --git a/tests/testthat/test_boost_tree_C50.R b/tests/testthat/test_boost_tree_C50.R
@@ -153,8 +153,14 @@ test_that('argument checks for data dimensions', {
     set_engine("C5.0") %>%
     set_mode("classification")
 
-  f_fit  <- spec %>% fit(species ~ ., data = penguins)
-  xy_fit <- spec %>% fit_xy(x = penguins[, -1], y = penguins$species)
+  expect_warning(
+    f_fit  <- spec %>% fit(species ~ ., data = penguins),
+    "1000 samples were requested"
+  )
+  expect_warning(
+    xy_fit <- spec %>% fit_xy(x = penguins[, -1], y = penguins$species),
+    "1000 samples were requested"
+  )
 
   expect_equal(f_fit$fit$control$minCases,  nrow(penguins))
   expect_equal(xy_fit$fit$control$minCases, nrow(penguins))
diff --git a/tests/testthat/test_boost_tree_xgboost.R b/tests/testthat/test_boost_tree_xgboost.R
@@ -281,20 +281,20 @@ test_that('early stopping', {
     regex = NA
   )
 
- expect_warning(
+  expect_warning(
     reg_fit <-
       boost_tree(trees = 20, stop_iter = 30, mode = "regression") %>%
       set_engine("xgboost", validation = .1) %>%
       fit(mpg ~ ., data = mtcars[-(1:4), ]),
     regex = "`early_stop` was reduced to 19"
   )
- expect_error(
-   reg_fit <-
-     boost_tree(trees = 20, stop_iter = 0, mode = "regression") %>%
-     set_engine("xgboost", validation = .1) %>%
-     fit(mpg ~ ., data = mtcars[-(1:4), ]),
-   regex = "`early_stop` should be on"
- )
+  expect_error(
+    reg_fit <-
+      boost_tree(trees = 20, stop_iter = 0, mode = "regression") %>%
+      set_engine("xgboost", validation = .1) %>%
+      fit(mpg ~ ., data = mtcars[-(1:4), ]),
+    regex = "`early_stop` should be on"
+  )
 })
 
 
@@ -379,9 +379,14 @@ test_that('argument checks for data dimensions', {
   penguins_dummy <- model.matrix(species ~ ., data = penguins)
   penguins_dummy <- as.data.frame(penguins_dummy[, -1])
 
-  f_fit  <- spec %>% fit(species ~ ., data = penguins)
-  xy_fit <- spec %>% fit_xy(x = penguins_dummy, y = penguins$species)
-
+  expect_warning(
+    f_fit  <- spec %>% fit(species ~ ., data = penguins),
+    "1000 samples were requested"
+  )
+  expect_warning(
+    xy_fit <- spec %>% fit_xy(x = penguins_dummy, y = penguins$species),
+    "1000 samples were requested"
+  )
   expect_equal(f_fit$fit$params$colsample_bytree, 1)
   expect_equal(f_fit$fit$params$min_child_weight, nrow(penguins))
   expect_equal(xy_fit$fit$params$colsample_bytree, 1)
diff --git a/tests/testthat/test_linear_reg_keras.R b/tests/testthat/test_linear_reg_keras.R
@@ -51,8 +51,11 @@ test_that('model fitting', {
       ),
     regexp = NA
   )
-  fit1$elapsed <- fit2$elapsed
-  expect_equal(fit1, fit2)
+  expect_equal(
+    unlist(keras::get_weights(fit1$fit)),
+    unlist(keras::get_weights(fit2$fit)),
+    tolerance = .1
+  )
 
   expect_error(
     fit(
diff --git a/tests/testthat/test_logistic_reg_keras.R b/tests/testthat/test_logistic_reg_keras.R
@@ -64,8 +64,11 @@ test_that('model fitting', {
       ),
     regexp = NA
   )
-  fit1$elapsed <- fit2$elapsed
-  expect_equal(fit1, fit2)
+  expect_equal(
+    unlist(keras::get_weights(fit1$fit)),
+    unlist(keras::get_weights(fit2$fit)),
+    tolerance = .1
+  )
 
   expect_error(
     fit(
diff --git a/tests/testthat/test_multinom_reg_keras.R b/tests/testthat/test_multinom_reg_keras.R
@@ -60,8 +60,11 @@ test_that('model fitting', {
       ),
     regexp = NA
   )
-  fit1$elapsed <- fit2$elapsed
-  expect_equal(fit1, fit2)
+  expect_equal(
+    unlist(keras::get_weights(fit1$fit)),
+    unlist(keras::get_weights(fit2$fit)),
+    tolerance = .1
+  )
 
   expect_error(
     fit(
diff --git a/tests/testthat/test_varying.R b/tests/testthat/test_varying.R
@@ -5,8 +5,6 @@ library(dplyr)
 
 context("varying parameters")
 
-load(test_path("recipes_examples.RData"))
-
 test_that('main parsnip arguments', {
 
   mod_1 <- rand_forest() %>%
@@ -94,49 +92,6 @@ test_that('other parsnip arguments', {
   expect_equal(other_4, exp_4)
 })
 
-
-test_that('recipe parameters', {
-
-  # un-randomify the id names
-  rec_1_id <- rec_1
-  rec_1_id$steps[[1]]$id <- "center_1"
-  rec_1_id$steps[[2]]$id <- "knnimpute_1"
-  rec_1_id$steps[[3]]$id <- "pca_1"
-
-  rec_res_1 <- varying_args(rec_1_id)
-
-  exp_1 <- tibble(
-    name = c("K", "num", "threshold", "options"),
-    varying = c(TRUE, TRUE, FALSE, FALSE),
-    id = c("knnimpute_1", rep("pca_1", 3)),
-    type = rep("step", 4)
-  )
-
-  expect_equal(rec_res_1, exp_1)
-
-  # un-randomify the id names
-  rec_3_id <- rec_3
-  rec_3_id$steps[[1]]$id <- "center_1"
-  rec_3_id$steps[[2]]$id <- "knnimpute_1"
-  rec_3_id$steps[[3]]$id <- "pca_1"
-
-  rec_res_3 <- varying_args(rec_3_id)
-  exp_3 <- exp_1
-  exp_3$varying <- FALSE
-  expect_equal(rec_res_3, exp_3)
-
-  rec_res_4 <- varying_args(rec_4)
-
-  exp_4 <- tibble(
-    name = character(),
-    varying = logical(),
-    id = character(),
-    type = character()
-  )
-
-  expect_equal(rec_res_4, exp_4)
-})
-
 test_that("empty lists return FALSE - #131", {
   expect_equal(
     parsnip:::find_varying(list()),
@@ -164,33 +119,3 @@ test_that("varying() deeply nested in calls can be located - #134", {
     TRUE
   )
 })
-
-test_that("recipe steps with non-varying args error if specified as varying()", {
-
-  rec_bad_varying <- rec_1
-  rec_bad_varying$steps[[1]]$skip <- varying()
-
-  expect_error(
-    varying_args(rec_bad_varying),
-    "The following argument for a recipe step of type 'step_center' is not allowed to vary: 'skip'."
-  )
-})
-
-test_that("`full = FALSE` returns only varying arguments", {
-
-  x_spec <- rand_forest(min_n = varying())  %>%
-    set_engine("ranger", sample.fraction = varying())
-
-  x_rec <- rec_1
-
-  expect_equal(
-    varying_args(x_spec, full = FALSE)$name,
-    c("min_n", "sample.fraction")
-  )
-
-  expect_equal(
-    varying_args(x_rec, full = FALSE)$name,
-    c("K", "num")
-  )
-
-})