From b63378c53850a7a7b7cced56957c2ce5ec2c4465 Mon Sep 17 00:00:00 2001
From: Florian Oswald <florian.oswald@gmail.com>
Date: Fri, 13 Nov 2020 23:23:21 +0100
Subject: [PATCH 1/4] added bias variance tradeoff app

---
 .../bias_variance/app.R                       | 124 ++++++++++++++++++
 inst/tutorial/chapter2/chapter2.Rmd           |   3 +-
 2 files changed, 125 insertions(+), 2 deletions(-)
 create mode 100644 inst/shinys/bias_variance_tradeoff/bias_variance/app.R

diff --git a/inst/shinys/bias_variance_tradeoff/bias_variance/app.R b/inst/shinys/bias_variance_tradeoff/bias_variance/app.R
new file mode 100644
index 0000000..99edd2b
--- /dev/null
+++ b/inst/shinys/bias_variance_tradeoff/bias_variance/app.R
@@ -0,0 +1,124 @@
+#
+# This is a Shiny web application. You can run the application by clicking
+# the 'Run App' button above.
+#
+# Find out more about building applications with Shiny here:
+#
+#    http://shiny.rstudio.com/
+#
+
+library(shiny)
+library(ggplot2)
+library(dplyr)
+library(gghighlight)
+library(cowplot)
+library(tidyr)
+
+getmodels <- function(x,y,newx,dfs = 2:20){
+    r = data.frame(x=x,y=y)
+    o = data.frame(x=newx)
+    s = list()
+    # browser()
+    for (i in 1:length(dfs)){
+        if (dfs[i] == 2){
+            s[[i]] <- lm(y~x,r)
+            o = cbind(o, predict(s[[i]], newdata = o))
+        } else {
+            s[[i]] <- smooth.spline(x,y,df = dfs[i])
+            o = cbind(o, predict(s[[i]], o$x)$y)
+        }
+    }
+    names(o)[-c(1)] <- paste0("df",dfs)
+    names(s) <- paste0("df",dfs)
+    list(models = s, pred = o, original = r)
+}
+
+data_alldf <- function(fun = function(x) {x*sin(x-2) + 0.2*x},n=90,ub = 5,nnew = 200,maxdf=60){
+    set.seed(1234)
+    eps = 1 # hard code variance
+    
+    r = data.frame(x = seq(0,ub,length.out = n))
+    r$truth = fun(r$x) 
+    r$epsi = rnorm(n,mean = 0, sd = eps)
+    r$y = r$truth + r$epsi
+    
+    mods = getmodels(r$x,r$y,seq(0,ub, length.out = nnew),dfs = seq(2,maxdf,by = 2))
+    # add test data to predictions
+    mods$pred$truth = fun(mods$pred$x)
+    mods$pred$testdata = mods$pred$truth + rnorm(nnew,mean = 0, sd = eps)
+    mods$plotdata = mods$pred %>% 
+        select(-testdata) %>%
+        tidyr::pivot_longer(-x,names_to = "flexibility")
+    
+    # mses and bias
+    mses = list(
+        train = colMeans(sapply(mods$models,residuals)^2)
+    ) # test mses
+    mses$test <- colMeans((mods$pred[,names(mods$models)] - mods$pred[,"testdata"])^2)
+    
+    # bias
+    mses$bias <- colMeans((mods$pred[,names(mods$models)] - mods$pred[,"truth"])^2)
+    mses$var <- diag(var(mods$pred[,names(mods$models)]))
+    
+    mses$plotdata <- data.frame(mses)
+    mses$plotdata$flexibility = seq(2,maxdf,by = 2)
+    mses$plotdata = mses$plotdata %>% tidyr::pivot_longer(-flexibility, names_to = "variable")
+    
+    
+    list(models = mods,mses = mses)
+    
+}
+
+
+
+plot_singledf <- function(x,df){
+    d = x$models$plotdata
+    m = x$mses$plotdata %>% filter(variable %in% c("test","train"))
+    
+    p = ggplot(d %>% filter(flexibility != "truth"))
+    p = p + geom_line(aes(x = x, y = value, color = flexibility), size = 1) + gghighlight(flexibility == paste0("df",df))
+    p = p + geom_line(data = d %>% filter(flexibility == "truth"), aes(x=x,y = value), color = "black", size = 1)
+    p = p + geom_point(data = x$models$original, aes(x,y), shape = 1,size = 2) + theme_bw()
+    
+    b = ggplot(m, aes(x=flexibility,y = value, color = variable)) + 
+        geom_point() + gghighlight(flexibility == df)
+    b = b + geom_hline(yintercept = 1, linetype = "dashed", color = "grey")+ theme_bw()
+    cowplot::plot_grid(p,b)
+}
+
+
+# Define UI for application that draws a histogram
+ui <- fluidPage(
+
+    # Application title
+    titlePanel("Bias Variance Tradeoff"),
+
+    # Sidebar with a slider input for number of bins 
+    sidebarLayout(
+        sidebarPanel(
+            sliderInput("df",
+                        "Degrees of Freedom:",
+                        min = 2,
+                        max = 60,
+                        step = 2,
+                        value = 2)
+        ),
+
+        # Show a plot of the generated distribution
+        mainPanel(
+           plotOutput("dfPlot")
+        )
+    )
+)
+
+# Define server logic required to draw a histogram
+server <- function(input, output) {
+    d = data_alldf()
+
+    output$dfPlot <- renderPlot({
+        plot_singledf(d,input$df)
+    })
+}
+
+# Run the application 
+shinyApp(ui = ui, server = server)
\ No newline at end of file
diff --git a/inst/tutorial/chapter2/chapter2.Rmd b/inst/tutorial/chapter2/chapter2.Rmd
index e840698..cb83292 100644
--- a/inst/tutorial/chapter2/chapter2.Rmd
+++ b/inst/tutorial/chapter2/chapter2.Rmd
@@ -10,8 +10,7 @@ runtime: shiny_prerendered
 ```{r setup, include=FALSE}
 library(learnr)
 knitr::opts_chunk$set(echo = FALSE)
-library(EnvStats, quietly = TRUE, warn.conflicts = FALSE, verbose = FALSE) # For Pareto Distribution
-# library(distr)
+library(EnvStats) # For Pareto Distribution
 x = c(6,2,5,3,5,1,5,7,6,3)
 ```
 

From 1bea9623f3c4bc507ca4e314dbf223e1c07dc3c2 Mon Sep 17 00:00:00 2001
From: Florian Oswald <florian.oswald@gmail.com>
Date: Sat, 14 Nov 2020 22:41:51 +0100
Subject: [PATCH 2/4] improved app

---
 .../bias_variance/app.R                       | 36 +++++++++++--------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/inst/shinys/bias_variance_tradeoff/bias_variance/app.R b/inst/shinys/bias_variance_tradeoff/bias_variance/app.R
index 99edd2b..9f9ac7a 100644
--- a/inst/shinys/bias_variance_tradeoff/bias_variance/app.R
+++ b/inst/shinys/bias_variance_tradeoff/bias_variance/app.R
@@ -14,6 +14,12 @@ library(gghighlight)
 library(cowplot)
 library(tidyr)
 
+# globals
+STEP = 10
+MAXDF = 60
+EPS = 1
+DFS = c(2:8,seq(10,MAXDF,by = STEP))
+
 getmodels <- function(x,y,newx,dfs = 2:20){
     r = data.frame(x=x,y=y)
     o = data.frame(x=newx)
@@ -33,19 +39,19 @@ getmodels <- function(x,y,newx,dfs = 2:20){
     list(models = s, pred = o, original = r)
 }
 
-data_alldf <- function(fun = function(x) {x*sin(x-2) + 0.2*x},n=90,ub = 5,nnew = 200,maxdf=60){
+data_alldf <- function(fun = function(x) {x*sin(x-2) + 0.2*x},n=90,ub = 5,nnew = 200){
     set.seed(1234)
-    eps = 1 # hard code variance
+    # eps = 1 # hard code variance
     
     r = data.frame(x = seq(0,ub,length.out = n))
     r$truth = fun(r$x) 
-    r$epsi = rnorm(n,mean = 0, sd = eps)
+    r$epsi = rnorm(n,mean = 0, sd = EPS)
     r$y = r$truth + r$epsi
     
-    mods = getmodels(r$x,r$y,seq(0,ub, length.out = nnew),dfs = seq(2,maxdf,by = 2))
+    mods = getmodels(r$x,r$y,seq(0,ub, length.out = nnew),dfs = DFS)
     # add test data to predictions
     mods$pred$truth = fun(mods$pred$x)
-    mods$pred$testdata = mods$pred$truth + rnorm(nnew,mean = 0, sd = eps)
+    mods$pred$testdata = mods$pred$truth + rnorm(nnew,mean = 0, sd = EPS)
     mods$plotdata = mods$pred %>% 
         select(-testdata) %>%
         tidyr::pivot_longer(-x,names_to = "flexibility")
@@ -61,7 +67,7 @@ data_alldf <- function(fun = function(x) {x*sin(x-2) + 0.2*x},n=90,ub = 5,nnew =
     mses$var <- diag(var(mods$pred[,names(mods$models)]))
     
     mses$plotdata <- data.frame(mses)
-    mses$plotdata$flexibility = seq(2,maxdf,by = 2)
+    mses$plotdata$flexibility = DFS
     mses$plotdata = mses$plotdata %>% tidyr::pivot_longer(-flexibility, names_to = "variable")
     
     
@@ -78,11 +84,12 @@ plot_singledf <- function(x,df){
     p = ggplot(d %>% filter(flexibility != "truth"))
     p = p + geom_line(aes(x = x, y = value, color = flexibility), size = 1) + gghighlight(flexibility == paste0("df",df))
     p = p + geom_line(data = d %>% filter(flexibility == "truth"), aes(x=x,y = value), color = "black", size = 1)
-    p = p + geom_point(data = x$models$original, aes(x,y), shape = 1,size = 2) + theme_bw()
+    p = p + geom_point(data = x$models$original, aes(x,y), shape = 1,size = 2) + theme_bw() + labs(title = "Fitting Data on f: f(x) + e", caption = "Solid black line is true f. Circles are realizations of f(X) + e: the data.") + ylab("Estimate of f")
     
     b = ggplot(m, aes(x=flexibility,y = value, color = variable)) + 
-        geom_point() + gghighlight(flexibility == df)
-    b = b + geom_hline(yintercept = 1, linetype = "dashed", color = "grey")+ theme_bw()
+        geom_point(size = 3, shape = 15) + gghighlight(flexibility == df)
+    b = b + geom_line(data = m, aes(x=flexibility, y = value, group = variable), color = "grey")  + ylab("MSE") + xlab("degree of flexibility (degrees of freedom)")
+    b = b + geom_hline(yintercept = 1, linetype = "dashed", color = "grey")+ theme_bw() + labs(title = "Mean Squared Errors")
     cowplot::plot_grid(p,b)
 }
 
@@ -96,12 +103,13 @@ ui <- fluidPage(
     # Sidebar with a slider input for number of bins 
     sidebarLayout(
         sidebarPanel(
-            sliderInput("df",
+            shinyWidgets::sliderTextInput("df",
                         "Degrees of Freedom:",
-                        min = 2,
-                        max = 60,
-                        step = 2,
-                        value = 2)
+                        choices = DFS)
+                        # min = 2,
+                        # max = MAXDF,
+                        # step = 1,
+                        # value = 2)
         ),
 
         # Show a plot of the generated distribution

From 8d40404358adae4bbd82be7dba7b43968ccc960e Mon Sep 17 00:00:00 2001
From: Florian Oswald <florian.oswald@gmail.com>
Date: Sat, 14 Nov 2020 22:42:47 +0100
Subject: [PATCH 3/4] missed a package

---
 inst/shinys/bias_variance_tradeoff/bias_variance/app.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/inst/shinys/bias_variance_tradeoff/bias_variance/app.R b/inst/shinys/bias_variance_tradeoff/bias_variance/app.R
index 9f9ac7a..b04ee4e 100644
--- a/inst/shinys/bias_variance_tradeoff/bias_variance/app.R
+++ b/inst/shinys/bias_variance_tradeoff/bias_variance/app.R
@@ -8,6 +8,7 @@
 #
 
 library(shiny)
+library(shinyWidgets)
 library(ggplot2)
 library(dplyr)
 library(gghighlight)

From 525804628b90387655033c8a78966021f0225831 Mon Sep 17 00:00:00 2001
From: Florian Oswald <florian.oswald@gmail.com>
Date: Tue, 24 Nov 2020 17:47:06 +0100
Subject: [PATCH 4/4] moved

---
 inst/shinys/bias_variance_tradeoff/{bias_variance => }/app.R | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename inst/shinys/bias_variance_tradeoff/{bias_variance => }/app.R (100%)

diff --git a/inst/shinys/bias_variance_tradeoff/bias_variance/app.R b/inst/shinys/bias_variance_tradeoff/app.R
similarity index 100%
rename from inst/shinys/bias_variance_tradeoff/bias_variance/app.R
rename to inst/shinys/bias_variance_tradeoff/app.R