KDD2009

knitr::opts_chunk$set(echo = TRUE)
library('vtreat')
library('WVPlots') # see: https://github.com/WinVector/WVPlots

## Loading required package: ggplot2

## Loading required package: grid

## Loading required package: gridExtra

## Loading required package: reshape2

## Loading required package: ROCR

## Loading required package: gplots

## 
## Attaching package: 'gplots'

## The following object is masked from 'package:stats':
## 
##     lowess

## Loading required package: plyr

## Loading required package: stringr

## Loading required package: mgcv

## Loading required package: nlme

## This is mgcv 1.8-12. For overview type 'help("mgcv-package")'.

# See installH2O.R for how to install h2o
# From: http://learn.h2o.ai/content/tutorials/deeplearning/
# See also: http://www.r-bloggers.com/things-to-try-after-user-part-1-deep-learning-with-h2o/
library('h2o')

## Loading required package: statmod

## 
## ----------------------------------------------------------------------
## 
## Your next step is to start H2O:
##     > h2o.init()
## 
## For H2O package documentation, ask for help:
##     > ??h2o
## 
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
## 
## ----------------------------------------------------------------------

## 
## Attaching package: 'h2o'

## The following objects are masked from 'package:stats':
## 
##     cor, sd, var

## The following objects are masked from 'package:base':
## 
##     &&, %*%, %in%, ||, apply, as.factor, as.numeric, colnames,
##     colnames<-, ifelse, is.character, is.factor, is.numeric, log,
##     log10, log1p, log2, round, signif, trunc

h2o.init(nthreads=-1, max_mem_size="2G")

## 
## H2O is not running yet, starting it now...
## 
## Note:  In case of errors look at the following log files:
##     /var/folders/7q/h_jp2vj131g5799gfnpzhdp80000gn/T//RtmpBXDypq/h2o_johnmount_started_from_r.out
##     /var/folders/7q/h_jp2vj131g5799gfnpzhdp80000gn/T//RtmpBXDypq/h2o_johnmount_started_from_r.err
## 
## 
## Starting H2O JVM and connecting: .. Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         2 seconds 114 milliseconds 
##     H2O cluster version:        3.8.3.4 
##     H2O cluster name:           H2O_started_from_R_johnmount_rnv147 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   1.78 GB 
##     H2O cluster total cores:    4 
##     H2O cluster allowed cores:  4 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     R Version:                  R version 3.3.1 (2016-06-21)

h2o.removeAll() ## clean slate - just in case the cluster was already running

## [1] 0

ncore <- parallel::detectCores()
cl <- parallel::makeCluster(ncore)

# see: https://github.com/WinVector/PreparingDataWorkshop/tree/master/KDD2009
d = read.table('orange_small_train.data.gz',
               header=T,sep='\t',na.strings=c('NA',''), 
               strip.white = TRUE,
               stringsAsFactors=FALSE)
churn = read.table('orange_small_train_churn.labels.txt',
                   header=F,sep='\t',
                   strip.white = TRUE,
                   stringsAsFactors = FALSE)
d$churn = churn$V1
set.seed(729375)
rgroup = runif(dim(d)[[1]])
dTrain = d[rgroup<=0.9,]  # set for building models
dTest = d[rgroup>0.9,] # set for evaluation
rm(list=c('d','churn'))
outcomes = c('churn','appetency','upselling')
nonvars <- c(outcomes,'rgroup')
vars = setdiff(colnames(dTrain),
                nonvars)
yName = 'churn'
yTarget = 1

# build data treatments
set.seed(239525)

# build treatments 
trainPlan = mkCrossFrameCExperiment(dTrain,
    vars,yName,yTarget,
    smFactor=2.0, 
    parallelCluster=cl)
print(trainPlan$method)

## [1] "kwaycrossystratified"

treatmentsC = trainPlan$treatments
treatedTrainM = trainPlan$crossFrame

#kddSig = 1/nrow(treatmentsC$scoreFrame)
selvars <- setdiff(colnames(treatedTrainM),outcomes)
treatedTrainM[[yName]] = treatedTrainM[[yName]]==yTarget

treatedTest = prepare(treatmentsC,
                      dTest,
                      varRestriction=selvars,
                      pruneSig=NULL, 
                      parallelCluster=cl)
treatedTest[[yName]] = treatedTest[[yName]]==yTarget

# simple default, production model would require hyperparameter search
vrsel <- runif(nrow(treatedTrainM))<=0.1
trainSet <- as.h2o(treatedTrainM[!vrsel,])

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%

valSet <-  as.h2o(treatedTrainM[vrsel,])

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%

goodvars <- treatmentsC$scoreFrame$varName[treatmentsC$scoreFrame$sig<1/nrow(treatmentsC$scoreFrame)]

hyper_params <- list(
  hidden=list(c(32,32,32),c(64,64)),
  input_dropout_ratio=c(0,0.05),
  rate=c(0.01,0.02),
  rate_annealing=c(1e-8,1e-7,1e-6)
)

print(date())

## [1] "Mon Jul 18 15:06:17 2016"

g <- h2o.grid(
  algorithm="deeplearning",
  grid_id="dl_grid", 
  training_frame=trainSet, 
  validation_frame=valSet,
  x=goodvars,
  y=yName,
  epochs=100,
  stopping_metric="misclassification",
  stopping_tolerance=1e-2,        ## stop when misclassification does not improve by >=1% for 2 scoring events
  stopping_rounds=20,
  score_validation_samples=10000, ## downsample validation set for faster scoring
  score_duty_cycle=0.025,         ## don't score more than 2.5% of the wall time
  adaptive_rate=F,                ## manually tuned learning rate
  momentum_start=0.5,             ## manually tuned momentum
  momentum_stable=0.9, 
  momentum_ramp=1e7, 
  l1=1e-5,
  l2=1e-5,
  activation=c("Rectifier"),
  max_w2=10,                      ## can help improve stability for Rectifier
  hyper_params=hyper_params
)

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |                                                                 |   1%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |==                                                               |   4%
  |                                                                       
  |===                                                              |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   5%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |====                                                             |   7%
  |                                                                       
  |=====                                                            |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |   8%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=======                                                          |  10%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |========                                                         |  13%
  |                                                                       
  |=========                                                        |  13%
  |                                                                       
  |=========                                                        |  14%
  |                                                                       
  |=========                                                        |  15%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |==========                                                       |  16%
  |                                                                       
  |===========                                                      |  16%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |===========                                                      |  18%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |============                                                     |  19%
  |                                                                       
  |=============                                                    |  19%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |=============                                                    |  21%
  |                                                                       
  |==============                                                   |  21%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |===============                                                  |  24%
  |                                                                       
  |================                                                 |  24%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  25%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |=================                                                |  27%
  |                                                                       
  |==================                                               |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  28%
  |                                                                       
  |===================                                              |  29%
  |                                                                       
  |===================                                              |  30%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |====================                                             |  31%
  |                                                                       
  |====================                                             |  32%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |=====================                                            |  33%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |======================                                           |  34%
  |                                                                       
  |======================                                           |  35%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |=======================                                          |  36%
  |                                                                       
  |========================                                         |  36%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |========================                                         |  38%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |=========================                                        |  39%
  |                                                                       
  |==========================                                       |  39%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |==========================                                       |  41%
  |                                                                       
  |===========================                                      |  41%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |============================                                     |  44%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  45%
  |                                                                       
  |==============================                                   |  46%
  |                                                                       
  |==============================                                   |  47%
  |                                                                       
  |===============================                                  |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  48%
  |                                                                       
  |================================                                 |  49%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=================================                                |  50%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |=================================                                |  52%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |===================================                              |  55%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |=====================================                            |  56%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |=====================================                            |  58%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |======================================                           |  59%
  |                                                                       
  |=======================================                          |  59%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |=======================================                          |  61%
  |                                                                       
  |========================================                         |  61%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |=========================================                        |  64%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  65%
  |                                                                       
  |===========================================                      |  66%
  |                                                                       
  |===========================================                      |  67%
  |                                                                       
  |============================================                     |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |=============================================                    |  68%
  |                                                                       
  |=============================================                    |  69%
  |                                                                       
  |=============================================                    |  70%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |==============================================                   |  71%
  |                                                                       
  |==============================================                   |  72%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |===============================================                  |  73%
  |                                                                       
  |================================================                 |  73%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |================================================                 |  75%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================                |  76%
  |                                                                       
  |==================================================               |  76%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |==================================================               |  78%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |===================================================              |  79%
  |                                                                       
  |====================================================             |  79%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |====================================================             |  81%
  |                                                                       
  |=====================================================            |  81%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |======================================================           |  84%
  |                                                                       
  |=======================================================          |  84%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  85%
  |                                                                       
  |========================================================         |  86%
  |                                                                       
  |========================================================         |  87%
  |                                                                       
  |=========================================================        |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  88%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |===========================================================      |  90%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |===========================================================      |  92%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |============================================================     |  93%
  |                                                                       
  |=============================================================    |  93%
  |                                                                       
  |=============================================================    |  94%
  |                                                                       
  |=============================================================    |  95%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |==============================================================   |  96%
  |                                                                       
  |===============================================================  |  96%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |===============================================================  |  98%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |================================================================ |  99%
  |                                                                       
  |=================================================================|  99%
  |                                                                       
  |=================================================================| 100%

print(date())

## [1] "Mon Jul 18 15:51:24 2016"

print(g@summary_table[1,])

## Hyper-Parameter Search Summary: ordered by increasing logloss
##        hidden input_dropout_ratio rate rate_annealing        model_ids
## 1 [I@321a9955                0.05 0.01         1.0E-6 dl_grid_model_18
##               logloss
## 1 0.25280926530079145

m <- h2o.getModel(g@model_ids[[1]])
summary(m)

## Model Details:
## ==============
## 
## H2OBinomialModel: deeplearning
## Model Key:  dl_grid_model_18 
## Status of Neuron Layers: predicting churn, 2-class classification, bernoulli distribution, CrossEntropy loss, 10,178 weights/biases, 139.0 KB, 4,042,200 training samples, mini-batch size 1
##   layer units      type dropout       l1       l2 mean_rate rate_RMS
## 1     1   249     Input  5.00 %                                     
## 2     2    32 Rectifier  0.00 % 0.000010 0.000010  0.001983 0.000000
## 3     3    32 Rectifier  0.00 % 0.000010 0.000010  0.001983 0.000000
## 4     4    32 Rectifier  0.00 % 0.000010 0.000010  0.001983 0.000000
## 5     5     2   Softmax         0.000010 0.000010  0.001983 0.000000
##   momentum mean_weight weight_RMS mean_bias bias_RMS
## 1                                                   
## 2 0.661688   -0.001950   0.097793  0.398993 0.077247
## 3 0.661688   -0.044393   0.187270  0.918026 0.043695
## 4 0.661688   -0.062796   0.183808  0.884139 0.051241
## 5 0.661688    0.020776   0.329298  0.008407 0.925762
## 
## H2OBinomialMetrics: deeplearning
## ** Reported on training data. **
## Description: Metrics reported on temporary training frame with 10153 samples
## 
## MSE:  0.06425998
## R^2:  0.06529115
## LogLoss:  0.2360321
## Mean Per-Class Error:  0.331089
## AUC:  0.7583378
## Gini:  0.5166755
## 
## Confusion Matrix for F1-optimal threshold:
##        FALSE TRUE    Error         Rate
## FALSE   8124 1275 0.135653   =1275/9399
## TRUE     397  357 0.526525     =397/754
## Totals  8521 1632 0.164680  =1672/10153
## 
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold    value idx
## 1                       max f1  0.132762 0.299246 129
## 2                       max f2  0.110446 0.410013 170
## 3                 max f0point5  0.175470 0.306220  63
## 4                 max accuracy  0.260613 0.926032   7
## 5                max precision  0.270213 0.666667   5
## 6                   max recall  0.013713 1.000000 378
## 7              max specificity  0.289274 0.999894   0
## 8             max absolute_MCC  0.132762 0.241162 129
## 9   max min_per_class_accuracy  0.089669 0.684350 207
## 10 max mean_per_class_accuracy  0.102372 0.693200 184
## 
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## H2OBinomialMetrics: deeplearning
## ** Reported on validation data. **
## Description: Metrics reported on full validation frame
## 
## MSE:  0.06810449
## R^2:  0.0425764
## LogLoss:  0.2528093
## Mean Per-Class Error:  0.385003
## AUC:  0.7011861
## Gini:  0.4023723
## 
## Confusion Matrix for F1-optimal threshold:
##        FALSE TRUE    Error       Rate
## FALSE   3672  579 0.136203  =579/4251
## TRUE     225  130 0.633803   =225/355
## Totals  3897  709 0.174555  =804/4606
## 
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold    value idx
## 1                       max f1  0.132774 0.244361 115
## 2                       max f2  0.065734 0.371461 244
## 3                 max f0point5  0.189017 0.244444  36
## 4                 max accuracy  0.293939 0.923144   0
## 5                max precision  0.293939 1.000000   0
## 6                   max recall  0.010462 1.000000 382
## 7              max specificity  0.293939 1.000000   0
## 8             max absolute_MCC  0.132774 0.169976 115
## 9   max min_per_class_accuracy  0.079685 0.645070 212
## 10 max mean_per_class_accuracy  0.075585 0.652419 221
## 
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## 
## 
## Scoring History: 
##              timestamp          duration training_speed    epochs
## 1  2016-07-18 15:39:17         0.000 sec                  0.00000
## 2  2016-07-18 15:39:18 33 min  1.091 sec 33882 rows/sec   1.00000
## 3  2016-07-18 15:39:25 33 min  7.785 sec 36566 rows/sec   7.00000
## 4  2016-07-18 15:39:32 33 min 14.431 sec 36899 rows/sec  13.00000
## 5  2016-07-18 15:39:38 33 min 20.716 sec 37677 rows/sec  19.00000
## 6  2016-07-18 15:39:45 33 min 27.325 sec 37621 rows/sec  25.00000
## 7  2016-07-18 15:39:50 33 min 33.042 sec 38628 rows/sec  31.00000
## 8  2016-07-18 15:39:56 33 min 38.794 sec 39298 rows/sec  37.00000
## 9  2016-07-18 15:40:02 33 min 44.310 sec 40021 rows/sec  43.00000
## 10 2016-07-18 15:40:08 33 min 51.110 sec 40357 rows/sec  50.00000
## 11 2016-07-18 15:40:15 33 min 57.342 sec 41011 rows/sec  57.00000
## 12 2016-07-18 15:40:20 34 min  2.719 sec 41457 rows/sec  63.00000
## 13 2016-07-18 15:40:25 34 min  8.092 sec 41836 rows/sec  69.00000
## 14 2016-07-18 15:40:32 34 min 14.279 sec 42245 rows/sec  76.00000
## 15 2016-07-18 15:40:38 34 min 21.038 sec 41798 rows/sec  82.00000
## 16 2016-07-18 15:40:46 34 min 28.786 sec 41856 rows/sec  90.00000
## 17 2016-07-18 15:40:52 34 min 34.624 sec 41902 rows/sec  96.00000
## 18 2016-07-18 15:40:56 34 min 38.299 sec 42040 rows/sec 100.00000
## 19 2016-07-18 15:40:56 34 min 38.444 sec 42039 rows/sec 100.00000
##    iterations        samples training_MSE training_r2 training_logloss
## 1           0       0.000000                                          
## 2           1   40422.000000      0.06569     0.04445          0.24766
## 3           7  282954.000000      0.06426     0.06529          0.23603
## 4          13  525486.000000      0.06278     0.08678          0.22795
## 5          19  768018.000000      0.06093     0.11372          0.21681
## 6          25 1010550.000000      0.06080     0.11562          0.21528
## 7          31 1253082.000000      0.05794     0.15719          0.20214
## 8          37 1495614.000000      0.05576     0.18896          0.19557
## 9          43 1738146.000000      0.05494     0.20085          0.19170
## 10         50 2021100.000000      0.05210     0.24214          0.18246
## 11         57 2304054.000000      0.05126     0.25436          0.17987
## 12         63 2546586.000000      0.05053     0.26498          0.17471
## 13         69 2789118.000000      0.04988     0.27453          0.17072
## 14         76 3072072.000000      0.04858     0.29336          0.16829
## 15         82 3314604.000000      0.04906     0.28638          0.16638
## 16         90 3637980.000000      0.04683     0.31883          0.16086
## 17         96 3880512.000000      0.04612     0.32918          0.15929
## 18        100 4042200.000000      0.04503     0.34498          0.15527
## 19        100 4042200.000000      0.06426     0.06529          0.23603
##    training_AUC training_lift training_classification_error validation_MSE
## 1                                                                         
## 2       0.70834       4.22448                       0.15710        0.06822
## 3       0.75834       5.41261                       0.16468        0.06810
## 4       0.78294       7.26082                       0.11327        0.06860
## 5       0.81457       7.39283                       0.12381        0.06951
## 6       0.82018       7.26082                       0.10273        0.06966
## 7       0.85132       8.84500                       0.09101        0.07011
## 8       0.86176      10.16515                       0.09830        0.07130
## 9       0.87139      10.16515                       0.08677        0.07085
## 10      0.88441      10.16515                       0.07978        0.07260
## 11      0.89192      10.95723                       0.06471        0.07262
## 12      0.89880      11.74932                       0.06796        0.07305
## 13      0.90783      11.74932                       0.06776        0.07490
## 14      0.90636      11.88134                       0.06826        0.07593
## 15      0.91538      11.35328                       0.06786        0.07455
## 16      0.91733      12.27738                       0.06757        0.07611
## 17      0.91944      12.40940                       0.06284        0.07798
## 18      0.92272      12.40940                       0.06215        0.07794
## 19      0.75834       5.41261                       0.16468        0.06810
##    validation_r2 validation_logloss validation_AUC validation_lift
## 1                                                                 
## 2        0.04089            0.25638        0.68741         6.07324
## 3        0.04258            0.25281        0.70119         6.07324
## 4        0.03558            0.25903        0.68377         4.69296
## 5        0.02277            0.27108        0.67638         4.41690
## 6        0.02074            0.27229        0.67084         3.31268
## 7        0.01441            0.29167        0.67970         3.58873
## 8                           0.29029        0.66654         3.03662
## 9        0.00398            0.28776        0.67610         3.31268
## 10                          0.29726        0.67496         3.86479
## 11                          0.29745        0.67289         2.76056
## 12                          0.31515        0.67075         3.31268
## 13                          0.34036        0.65333         3.03662
## 14                          0.33879        0.65439         3.58873
## 15                          0.33393        0.65362         4.14085
## 16                          0.36217        0.65190         3.03662
## 17                          0.35514        0.64155         2.48451
## 18                          0.36718        0.65519         3.03662
## 19       0.04258            0.25281        0.70119         6.07324
##    validation_classification_error
## 1                                 
## 2                          0.18541
## 3                          0.17455
## 4                          0.18498
## 5                          0.17716
## 6                          0.21038
## 7                          0.19062
## 8                          0.22384
## 9                          0.24620
## 10                         0.23990
## 11                         0.23838
## 12                         0.24229
## 13                         0.23860
## 14                         0.21906
## 15                         0.19496
## 16                         0.19019
## 17                         0.24989
## 18                         0.28702
## 19                         0.17455

plot(m)

pTrain <- predict(m,newdata=as.h2o(treatedTrainM))

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |====                                                             |   7%
  |                                                                       
  |=================================================================| 100%

treatedTrainM$pred <- as.data.frame(pTrain[,'TRUE'])[[1]]
WVPlots::ROCPlot(treatedTrainM,'pred',yName,'prediction on train')

pTest <- predict(m,newdata=as.h2o(treatedTest))

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |=================================================================| 100%

treatedTest$pred <- as.data.frame(pTest[,'TRUE'])[[1]]
WVPlots::ROCPlot(treatedTest,'pred',yName,'prediction on test')

h2o.shutdown(prompt=FALSE)

## [1] TRUE

if(!is.null(cl)) {
    parallel::stopCluster(cl)
    cl = NULL
}

KDD2009

Win-Vector LLC

July 18, 2016