For access to more scripts, see the repo here: https://github.com/blakeshurtz/multilevel_marketing

## Loading required package: StanHeaders
## Loading required package: ggplot2
## rstan (Version 2.19.2, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For improved execution time, we recommend calling
## Sys.setenv(LOCAL_CPPFLAGS = '-march=native')
## although this causes Stan to throw an error on a few processors.
## Loading required package: Rcpp
## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo
## rstanarm (Version 2.19.2, packaged: 2019-10-01 20:20:33 UTC)
## - Do not expect the default priors to remain the same in future rstanarm versions.
## Thus, R scripts should specify priors explicitly, even if they are just the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores())
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting
## 
## Attaching package: 'rstanarm'
## The following object is masked from 'package:rstan':
## 
##     loo
## This is loo version 2.2.0
## - Online documentation and vignettes at mc-stan.org/loo
## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
## - Windows 10 users: loo may be very slow if 'mc.cores' is set in your .Rprofile file (see https://github.com/stan-dev/loo/issues/94).
## 
## Attaching package: 'loo'
## The following object is masked from 'package:rstan':
## 
##     loo
## -- Attaching packages --- tidyverse 1.3.0 --
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## v purrr   0.3.3
## -- Conflicts ------ tidyverse_conflicts() --
## x tidyr::extract() masks rstan::extract()
## x dplyr::filter()  masks stats::filter()
## x dplyr::lag()     masks stats::lag()
## This is bayesplot version 1.7.1
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting
## Loading required package: shiny
## 
## This is shinystan version 2.5.0

0.0.1 Simulating Data

###simulating data
set.seed(849107) #for reproducability

###define multi-level vars
ng_marketing <- 5 #marketing sources
nj_marketing <- 300 #marketing cluster size (make unbalanced) 
n = ng_marketing * nj_marketing

ng_year <- 5 #years of data
nj_year <- 300 #cluster size (make unbalanced) 
n = ng_year * nj_year

ng_city <- 5 #citys of data
nj_city <- 300 #cluster size (make unbalanced) 
n = ng_city * nj_city

ng_salesperson <- 5 #citys of data
nj_salesperson <- 300 #cluster size (make unbalanced) 
n = ng_salesperson * nj_salesperson

###creating cluster variables, sample to randomise order

#marketing
lead_source <- gl(ng_marketing, k = nj_marketing,
            labels = c("radio", "tv", "newspaper", 
                       "internet", "vehicle"
                       )
            )

lead_source <- sample(lead_source, n, replace = FALSE)

#year
year <- gl(ng_year, k = nj_year,
                  labels = c("2015", "2016", "2017", 
                             "2018", "2019"
                  )
)

year <- sample(year, n, replace = FALSE)

#city
city <- gl(ng_city, k = nj_city,
                  labels = c("davis", "vacaville", "fairfield", 
                             "woodland", "suisun"
                  )
)

city <- sample(city, n, replace = FALSE)

#sales_professional
sales_professional <- gl(ng_salesperson, k = nj_salesperson,
             labels = c("John", "Susan", "Peter", 
                        "Steven", "Mary"
             )
)

sales_professional <- sample(sales_professional, n, replace = FALSE)

###standard (single level) variables
#product
product <- factor(c(rep("HVAC", 750),
                         rep("DHW", 500),
                         rep("SOLAR", 250))
)


product <- sample(product, n, replace = FALSE)

#n_service
n_service <- scale(rpois(1500, 2))

#prior_revenue
prior_revenue <- c(rep(0, 1000), rep(1, 500))

prior_revenue <- sample(prior_revenue, n, replace = FALSE)

#call_to_estimate
call_to_estimate <- c(scale(rpois(1500, 2)))

#creating table showing factor label and numeric value
marketing_table = unique(data.frame(bind_cols(name = lead_source,
                                               value= as.numeric(lead_source)-3
                                          )
                                )
                    )

year_table = unique(data.frame(bind_cols(name = year,
                                          value= as.numeric(year)-3
                                         )
                                )
                    )

city_table = unique(data.frame(bind_cols(name = city,
                                              value= as.numeric(city)-3
                                        )
                              )
                    )

salesperson_table = unique(data.frame(bind_cols(name = sales_professional,
                                         value= as.numeric(sales_professional)-3
                                        )
                              )
                    )


product_table = unique(data.frame(bind_cols(name = product,
                                                value= as.numeric(product)-2
                                                )
                                      )
                            )

#sd- in estimating coefficients, sigma has inverse relationship with n
sigma = .01

#simulate regression
close <- as.numeric(lead_source) - 3 + 
         as.numeric(year) - 3 + 
         as.numeric(city) - 3 +
         as.numeric(sales_professional) - 3 +
         as.numeric(product) - 2 +
         n_service +
         prior_revenue +
         call_to_estimate +
         rnorm(n, 0, sigma)

###convert to logistic
pr = 1/(1+exp(-close)) # pass through an inv-logit function
range(pr)
## [1] 2.843567e-05 9.999784e-01
## close
##   0   1 
## 724 776

0.0.5 Bayesplot

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## $lead_source
##           (Intercept)
## radio     -1.89454310
## tv        -1.09452622
## newspaper  0.05502839
## internet   0.80787828
## vehicle    1.96022543
## 
## $year
##      (Intercept)
## 2015  -1.9540767
## 2016  -1.0766000
## 2017  -0.2532088
## 2018   1.1979384
## 2019   1.8545443
## 
## $city
##           (Intercept)
## davis      -1.8081953
## vacaville  -1.2131092
## fairfield  -0.3171668
## woodland    1.0816224
## suisun      2.1825517
## 
## $sales_professional
##        (Intercept)
## John   -2.32908962
## Susan  -0.85040569
## Peter  -0.02412893
## Steven  0.98825503
## Mary    2.04118571
## 
## with conditional variances for "lead_source" "year" "city" "sales_professional"