KDD2009 example. Winners had hold-out AUC of 0.7611 on churn. See here for more details.

#load some libraries
# http://www.win-vector.com/blog/2014/08/vtreat-designing-a-package-for-variable-treatment/
library('vtreat')
# devtools::install_github("WinVector/WVPlots")
library('WVPlots')
## Loading required package: ggplot2
## Loading required package: grid
## Loading required package: gridExtra
## Loading required package: reshape2
## Loading required package: ROCR
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
## Loading required package: plyr
## Loading required package: stringr
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-14. For overview type 'help("mgcv-package")'.
library('parallel')
library('gbm')
## Loading required package: survival
## Loading required package: lattice
## Loading required package: splines
## Loaded gbm 2.1.1
#library('class')
library('ggplot2')
library('glmnet')
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-5
# load the data as in the book
# change this path to match your directory structure
dir = '~/Documents/work/PracticalDataScienceWithR/zmPDSwR/KDD2009/' 

d = read.table(paste(dir,'orange_small_train.data.gz',sep=''),
                header=T,sep='\t',na.strings=c('NA',''), 
               stringsAsFactors=FALSE)
churn = read.table(paste(dir,'orange_small_train_churn.labels.txt',sep=''),
                    header=F,sep='\t')
d$churn = churn$V1
appetency = read.table(paste(dir,'orange_small_train_appetency.labels.txt',sep=''),
                        header=F,sep='\t')
d$appetency = appetency$V1
upselling = read.table(paste(dir,'orange_small_train_upselling.labels.txt',sep=''),
                        header=F,sep='\t')
d$upselling = upselling$V1
set.seed(729375)
d$rgroup = runif(dim(d)[[1]])
dTrain = subset(d,rgroup<=0.9)  # set for building models
dTest = subset(d,rgroup>0.9) # set for evaluation
debug = FALSE
if(debug) {
  dTrain <- dTrain[sample.int(nrow(dTrainM),100),]
  dTest <- dTest[sample.int(nrow(dTest),100),]
}
rm(list=c('d','churn','appetency','upselling','dir'))
outcomes = c('churn','appetency','upselling')
nonvars <- c(outcomes,'rgroup')
vars = setdiff(colnames(dTrain),
                nonvars)
yName = 'churn'
yTarget = 1
# build data treatments

set.seed(239525)

cl <- c()
if(!debug) {
  ncore <- parallel::detectCores()
  cl <- parallel::makeCluster(ncore)
}

# build treatments 
kddSig = 0.05

trainPlan = mkCrossFrameCExperiment(dTrain,
    vars,yName,yTarget,
    smFactor=2.0, rareCount = 2, rareSig = 0.5,
    parallelCluster=cl)
treatmentsC = trainPlan$treatments
treatedTrainM = trainPlan$crossFrame

selvars = treatmentsC$scoreFrame$varName[treatmentsC$scoreFrame$sig<kddSig]

print(treatmentsC$scoreFrame[,c('varName','sig')])
##                                    varName          sig
## 1                               Var1_clean 9.492776e-01
## 2                               Var1_isBAD 4.529984e-01
## 3                               Var2_clean 8.605947e-01
## 4                               Var2_isBAD 4.708216e-06
## 5                               Var3_clean 7.676960e-01
## 6                               Var3_isBAD 4.885833e-06
## 7                               Var4_clean 4.660491e-01
## 8                               Var4_isBAD 1.862012e-06
## 9                               Var5_clean 8.234268e-01
## 10                              Var5_isBAD 1.419659e-06
## 11                              Var6_clean 6.820836e-05
## 12                              Var6_isBAD 1.822398e-15
## 13                              Var7_clean 2.243008e-50
## 14                              Var7_isBAD 4.938685e-12
## 15                              Var9_clean 7.172173e-01
## 16                              Var9_isBAD 4.529984e-01
## 17                             Var10_clean 4.154370e-01
## 18                             Var10_isBAD 1.419659e-06
## 19                             Var11_clean 9.166634e-01
## 20                             Var11_isBAD 4.885833e-06
## 21                             Var12_clean 3.775127e-01
## 22                             Var12_isBAD 7.958694e-01
## 23                             Var13_clean 3.243381e-29
## 24                             Var13_isBAD 4.938685e-12
## 25                             Var14_clean 2.849611e-01
## 26                             Var14_isBAD 4.885833e-06
## 27                             Var16_clean 6.766542e-01
## 28                             Var16_isBAD 1.419659e-06
## 29                             Var17_clean 8.319031e-01
## 30                             Var17_isBAD 1.862012e-06
## 31                             Var18_clean 8.868140e-01
## 32                             Var18_isBAD 1.862012e-06
## 33                             Var19_clean 2.251994e-01
## 34                             Var19_isBAD 1.862012e-06
## 35                             Var21_clean 8.586691e-02
## 36                             Var21_isBAD 1.822398e-15
## 37                             Var22_clean 9.137265e-02
## 38                             Var22_isBAD 6.650215e-16
## 39                             Var23_clean 7.450751e-01
## 40                             Var23_isBAD 1.419659e-06
## 41                             Var24_clean 1.751853e-02
## 42                             Var24_isBAD 3.283203e-02
## 43                             Var25_clean 1.871793e-02
## 44                             Var25_isBAD 6.650215e-16
## 45                             Var26_clean 8.131591e-01
## 46                             Var26_isBAD 1.419659e-06
## 47                             Var27_clean 4.453701e-01
## 48                             Var27_isBAD 1.419659e-06
## 49                             Var28_clean 2.576883e-03
## 50                             Var28_isBAD 6.216094e-16
## 51                             Var29_clean 3.997658e-01
## 52                             Var29_isBAD 4.529984e-01
## 53                             Var30_clean 8.526755e-01
## 54                             Var30_isBAD 4.529984e-01
## 55                             Var33_clean 8.218358e-01
## 56                             Var33_isBAD 1.069230e-04
## 57                             Var34_clean 2.481331e-01
## 58                             Var34_isBAD 4.708216e-06
## 59                             Var35_clean 6.384715e-02
## 60                             Var35_isBAD 6.650215e-16
## 61                             Var36_clean 8.308712e-01
## 62                             Var36_isBAD 4.708216e-06
## 63                             Var37_clean 7.051880e-01
## 64                             Var37_isBAD 1.862012e-06
## 65                             Var38_clean 1.377868e-01
## 66                             Var38_isBAD 6.650215e-16
## 67                             Var40_clean 6.865043e-01
## 68                             Var40_isBAD 4.708216e-06
## 69                             Var41_clean 8.896375e-01
## 70                             Var41_isBAD 4.529984e-01
## 71                             Var43_clean 9.368772e-01
## 72                             Var43_isBAD 4.708216e-06
## 73                             Var44_clean 6.515621e-01
## 74                             Var44_isBAD 6.650215e-16
## 75                             Var45_clean 3.862460e-01
## 76                             Var45_isBAD 2.037797e-01
## 77                             Var46_clean 6.375689e-01
## 78                             Var46_isBAD 4.708216e-06
## 79                             Var47_clean 6.770957e-01
## 80                             Var47_isBAD 4.529984e-01
## 81                             Var49_clean 5.199929e-01
## 82                             Var49_isBAD 4.708216e-06
## 83                             Var50_clean 5.624658e-01
## 84                             Var50_isBAD 4.529984e-01
## 85                             Var51_clean 1.451012e-01
## 86                             Var51_isBAD 5.895954e-05
## 87                             Var53_clean 1.419659e-01
## 88                             Var53_isBAD 4.529984e-01
## 89                             Var54_clean 8.323607e-01
## 90                             Var54_isBAD 4.708216e-06
## 91                             Var56_clean 8.017284e-01
## 92                             Var56_isBAD 4.294514e-02
## 93                             Var57_clean 1.196233e-01
## 94                             Var58_clean 9.500667e-01
## 95                             Var58_isBAD 4.529984e-01
## 96                             Var59_clean 4.834119e-01
## 97                             Var59_isBAD 2.014521e-02
## 98                             Var60_clean 8.494907e-01
## 99                             Var60_isBAD 1.419659e-06
## 100                            Var61_clean 9.396293e-01
## 101                            Var61_isBAD 1.069230e-04
## 102                            Var62_clean 5.246557e-01
## 103                            Var62_isBAD 7.958694e-01
## 104                            Var63_clean 7.695258e-01
## 105                            Var63_isBAD 5.016146e-01
## 106                            Var64_clean 9.402750e-01
## 107                            Var64_isBAD 1.933946e-01
## 108                            Var65_clean 4.650316e-17
## 109                            Var65_isBAD 4.938685e-12
## 110                            Var66_clean 7.232170e-01
## 111                            Var66_isBAD 5.016146e-01
## 112                            Var67_clean 6.842271e-01
## 113                            Var67_isBAD 1.419659e-06
## 114                            Var68_clean 7.448982e-01
## 115                            Var68_isBAD 4.708216e-06
## 116                            Var69_clean 3.134760e-01
## 117                            Var69_isBAD 1.419659e-06
## 118                            Var70_clean 7.319862e-01
## 119                            Var70_isBAD 1.419659e-06
## 120                            Var71_clean 9.406609e-01
## 121                            Var71_isBAD 8.505976e-06
## 122                            Var72_clean 5.281298e-12
## 123                            Var72_isBAD 1.082957e-01
## 124                            Var73_clean 4.221817e-50
## 125                            Var74_clean 1.142333e-34
## 126                            Var74_isBAD 4.938685e-12
## 127                            Var75_clean 5.766790e-01
## 128                            Var75_isBAD 4.708216e-06
## 129                            Var76_clean 1.258218e-01
## 130                            Var76_isBAD 6.650215e-16
## 131                            Var77_clean 3.869888e-01
## 132                            Var77_isBAD 4.529984e-01
## 133                            Var78_clean 5.539030e-01
## 134                            Var78_isBAD 6.650215e-16
## 135                            Var80_clean 8.666502e-01
## 136                            Var80_isBAD 1.419659e-06
## 137                            Var81_clean 1.954891e-12
## 138                            Var81_isBAD 1.822398e-15
## 139                            Var82_clean 6.062823e-02
## 140                            Var82_isBAD 1.862012e-06
## 141                            Var83_clean 8.666888e-01
## 142                            Var83_isBAD 6.650215e-16
## 143                            Var84_clean 6.994175e-01
## 144                            Var84_isBAD 4.885833e-06
## 145                            Var85_clean 1.604340e-02
## 146                            Var85_isBAD 6.650215e-16
## 147                            Var86_clean 6.330919e-01
## 148                            Var86_isBAD 4.529984e-01
## 149                            Var87_clean 5.909534e-01
## 150                            Var87_isBAD 4.529984e-01
## 151                            Var88_clean 8.070649e-01
## 152                            Var88_isBAD 2.035268e-04
## 153                            Var89_clean 5.913166e-01
## 154                            Var89_isBAD 4.294514e-02
## 155                            Var90_clean 6.519917e-01
## 156                            Var90_isBAD 4.529984e-01
## 157                            Var91_clean 9.406609e-01
## 158                            Var91_isBAD 8.505976e-06
## 159                            Var92_clean 5.102136e-01
## 160                            Var92_isBAD 9.131691e-01
## 161                            Var93_clean 8.799233e-01
## 162                            Var93_isBAD 1.419659e-06
## 163                            Var94_clean 3.040503e-03
## 164                            Var94_isBAD 1.082957e-01
## 165                            Var95_clean 8.740925e-01
## 166                            Var95_isBAD 4.708216e-06
## 167                            Var96_clean 9.791331e-01
## 168                            Var96_isBAD 4.708216e-06
## 169                            Var97_clean 5.104204e-01
## 170                            Var97_isBAD 1.419659e-06
## 171                            Var98_clean 5.196590e-01
## 172                            Var98_isBAD 7.958694e-01
## 173                            Var99_clean 8.074618e-01
## 174                            Var99_isBAD 1.862012e-06
## 175                           Var100_clean 3.622525e-01
## 176                           Var100_isBAD 4.529984e-01
## 177                           Var101_clean 9.132568e-01
## 178                           Var101_isBAD 1.609703e-04
## 179                           Var102_clean 8.995868e-01
## 180                           Var102_isBAD 4.212975e-02
## 181                           Var103_clean 9.833747e-01
## 182                           Var103_isBAD 1.419659e-06
## 183                           Var104_clean 9.008945e-01
## 184                           Var104_isBAD 2.014521e-02
## 185                           Var105_clean 9.008945e-01
## 186                           Var105_isBAD 2.014521e-02
## 187                           Var106_clean 9.832242e-01
## 188                           Var106_isBAD 1.862012e-06
## 189                           Var107_clean 9.052218e-01
## 190                           Var107_isBAD 1.419659e-06
## 191                           Var108_clean 4.496980e-01
## 192                           Var108_isBAD 4.529984e-01
## 193                           Var109_clean 1.821215e-01
## 194                           Var109_isBAD 3.283203e-02
## 195                           Var110_clean 1.182896e-01
## 196                           Var110_isBAD 4.529984e-01
## 197                           Var111_clean 5.100830e-01
## 198                           Var111_isBAD 8.505976e-06
## 199                           Var112_clean 1.461138e-01
## 200                           Var112_isBAD 6.650215e-16
## 201                           Var113_clean 3.397145e-35
## 202                           Var114_clean 1.801976e-01
## 203                           Var114_isBAD 4.708216e-06
## 204                           Var115_clean 5.050064e-01
## 205                           Var115_isBAD 2.014521e-02
## 206                           Var116_clean 8.816889e-01
## 207                           Var116_isBAD 4.529984e-01
## 208                           Var117_clean 5.088379e-01
## 209                           Var117_isBAD 1.862012e-06
## 210                           Var118_isBAD 9.131691e-01
## 211                           Var119_clean 2.755317e-02
## 212                           Var119_isBAD 1.822398e-15
## 213                           Var120_clean 9.554935e-01
## 214                           Var120_isBAD 1.419659e-06
## 215                           Var121_clean 9.468560e-01
## 216                           Var121_isBAD 4.529984e-01
## 217                           Var122_clean 5.330364e-01
## 218                           Var122_isBAD 4.708216e-06
## 219                           Var123_clean 8.512207e-01
## 220                           Var123_isBAD 6.650215e-16
## 221                           Var124_clean 8.426307e-01
## 222                           Var124_isBAD 1.862012e-06
## 223                           Var125_clean 1.240769e-06
## 224                           Var125_isBAD 4.938685e-12
## 225                           Var126_clean 1.016548e-18
## 226                           Var126_isBAD 3.089900e-73
## 227                           Var127_clean 9.526229e-01
## 228                           Var127_isBAD 2.035268e-04
## 229                           Var128_clean 8.070649e-01
## 230                           Var128_isBAD 2.035268e-04
## 231                           Var129_clean 8.423695e-01
## 232                           Var129_isBAD 4.529984e-01
## 233                           Var130_clean 6.175541e-02
## 234                           Var130_isBAD 4.885833e-06
## 235                           Var131_clean 1.474015e-01
## 236                           Var131_isBAD 4.529984e-01
## 237                           Var132_clean 1.146808e-01
## 238                           Var132_isBAD 6.650215e-16
## 239                           Var133_clean 9.205745e-01
## 240                           Var133_isBAD 6.650215e-16
## 241                           Var134_clean 2.869840e-01
## 242                           Var134_isBAD 6.650215e-16
## 243                           Var135_clean 9.381591e-01
## 244                           Var135_isBAD 1.862012e-06
## 245                           Var136_clean 2.068155e-01
## 246                           Var136_isBAD 5.016146e-01
## 247                           Var137_clean 6.449236e-01
## 248                           Var137_isBAD 4.529984e-01
## 249                           Var138_clean 8.520043e-01
## 250                           Var138_isBAD 1.862012e-06
## 251                           Var139_clean 3.492143e-02
## 252                           Var139_isBAD 1.419659e-06
## 253                           Var140_clean 4.744656e-17
## 254                           Var140_isBAD 4.938685e-12
## 255                           Var142_clean 1.548562e-01
## 256                           Var142_isBAD 4.529984e-01
## 257                           Var143_clean 2.680228e-02
## 258                           Var143_isBAD 6.650215e-16
## 259                           Var144_clean 8.900620e-30
## 260                           Var144_isBAD 1.822398e-15
## 261                           Var145_clean 8.188110e-01
## 262                           Var145_isBAD 1.862012e-06
## 263                           Var146_clean 8.568031e-01
## 264                           Var146_isBAD 1.419659e-06
## 265                           Var147_clean 2.938475e-01
## 266                           Var147_isBAD 1.419659e-06
## 267                           Var148_clean 9.401887e-01
## 268                           Var148_isBAD 1.419659e-06
## 269                           Var149_clean 7.314142e-03
## 270                           Var149_isBAD 3.283203e-02
## 271                           Var150_clean 9.022465e-01
## 272                           Var150_isBAD 1.862012e-06
## 273                           Var151_clean 9.380727e-01
## 274                           Var151_isBAD 1.069230e-04
## 275                           Var152_clean 8.730887e-01
## 276                           Var152_isBAD 1.862012e-06
## 277                           Var153_clean 9.003705e-02
## 278                           Var153_isBAD 6.650215e-16
## 279                           Var154_clean 5.678110e-01
## 280                           Var154_isBAD 4.529984e-01
## 281                           Var155_clean 9.893070e-01
## 282                           Var155_isBAD 1.862012e-06
## 283                           Var156_clean 7.232170e-01
## 284                           Var156_isBAD 5.016146e-01
## 285                           Var157_clean 8.228076e-01
## 286                           Var157_isBAD 8.505976e-06
## 287                           Var158_clean 9.410387e-01
## 288                           Var158_isBAD 1.609703e-04
## 289                           Var159_clean 9.823880e-01
## 290                           Var159_isBAD 4.708216e-06
## 291                           Var160_clean 5.977602e-03
## 292                           Var160_isBAD 6.650215e-16
## 293                           Var161_clean 3.160734e-01
## 294                           Var161_isBAD 1.862012e-06
## 295                           Var162_clean 5.769463e-01
## 296                           Var162_isBAD 4.708216e-06
## 297                           Var163_clean 4.873034e-01
## 298                           Var163_isBAD 6.650215e-16
## 299                           Var164_clean 7.113866e-01
## 300                           Var164_isBAD 1.862012e-06
## 301                           Var165_clean 8.730784e-01
## 302                           Var165_isBAD 1.609703e-04
## 303                           Var166_clean 9.690297e-01
## 304                           Var166_isBAD 1.419659e-06
## 305                           Var168_clean 1.250505e-02
## 306                           Var168_isBAD 4.529984e-01
## 307                           Var170_clean 5.107866e-01
## 308                           Var170_isBAD 4.708216e-06
## 309                           Var171_clean 8.303872e-01
## 310                           Var171_isBAD 2.035268e-04
## 311                           Var172_clean 4.570958e-01
## 312                           Var172_isBAD 1.419659e-06
## 313                           Var173_clean 2.412398e-02
## 314                           Var173_isBAD 6.650215e-16
## 315                           Var174_clean 6.481659e-01
## 316                           Var174_isBAD 1.862012e-06
## 317                           Var176_clean 5.014708e-01
## 318                           Var176_isBAD 4.885833e-06
## 319                           Var177_clean 1.286309e-01
## 320                           Var177_isBAD 4.708216e-06
## 321                           Var178_clean 5.505067e-01
## 322                           Var178_isBAD 4.294514e-02
## 323                           Var179_clean 8.971566e-01
## 324                           Var179_isBAD 1.862012e-06
## 325                           Var180_clean 8.457465e-01
## 326                           Var180_isBAD 4.529984e-01
## 327                           Var181_clean 8.072579e-01
## 328                           Var181_isBAD 6.650215e-16
## 329                           Var182_clean 9.562394e-01
## 330                           Var182_isBAD 1.862012e-06
## 331                           Var183_clean 7.838545e-01
## 332                           Var183_isBAD 4.708216e-06
## 333                           Var184_clean 7.155066e-01
## 334                           Var184_isBAD 4.708216e-06
## 335                           Var186_clean 8.510361e-01
## 336                           Var186_isBAD 4.529984e-01
## 337                           Var187_clean 6.159631e-01
## 338                           Var187_isBAD 4.529984e-01
## 339                           Var188_clean 5.453843e-02
## 340                           Var188_isBAD 4.708216e-06
## 341                           Var189_clean 1.291969e-64
## 342                           Var189_isBAD 1.970811e-01
## 343                           Var190_clean 5.455177e-01
## 344                           Var190_isBAD 1.930716e-01
## 345                          Var191_lev_NA 2.035268e-04
## 346                      Var191_lev_x.r__I 2.035268e-04
## 347                            Var192_catP 2.082577e-10
## 348                            Var192_catB 5.533664e-40
## 349                   Var193_lev_x.2Knk1KF 4.037365e-22
## 350                   Var193_lev_x.AERks4l 4.739646e-04
## 351                      Var193_lev_x.RO12 1.876226e-40
## 352                            Var193_catP 1.701853e-39
## 353                            Var193_catB 6.236651e-32
## 354                          Var194_lev_NA 6.161267e-05
## 355                      Var194_lev_x.SEuy 8.513717e-05
## 356                            Var194_catP 6.096128e-05
## 357                            Var194_catB 1.291723e-03
## 358                      Var195_lev_x.taul 5.902705e-06
## 359                            Var195_catP 6.138027e-06
## 360                            Var195_catB 1.929683e-05
## 361                      Var196_lev_x.1K8T 9.389575e-02
## 362                            Var196_catP 9.404524e-02
## 363                            Var196_catB 1.000194e-01
## 364                      Var197_lev_x.0Xwj 1.280985e-02
## 365                      Var197_lev_x.487l 6.156225e-02
## 366                      Var197_lev_x.JLbT 2.873384e-01
## 367                      Var197_lev_x.ssAy 3.997540e-03
## 368                      Var197_lev_x.TyGl 4.804164e-02
## 369                            Var197_catP 1.416153e-06
## 370                            Var197_catB 6.309090e-04
## 371                        Var198_lev_rare 3.059497e-06
## 372                   Var198_lev_x.fhk21Ss 1.586589e-20
## 373                   Var198_lev_x.PHNvXy8 1.178434e-02
## 374                            Var198_catP 9.978122e-23
## 375                            Var198_catB 7.173122e-20
## 376                        Var199_lev_rare 2.388672e-18
## 377                            Var199_catP 1.470377e-36
## 378                            Var199_catB 7.717977e-62
## 379                          Var200_lev_NA 9.625925e-29
## 380                        Var200_lev_rare 3.419952e-22
## 381                            Var200_catP 9.623338e-29
## 382                            Var200_catB 1.718490e-16
## 383                          Var201_lev_NA 6.239981e-05
## 384                      Var201_lev_x.smXZ 5.556270e-05
## 385                            Var201_catP 6.754469e-05
## 386                            Var201_catB 1.024274e-04
## 387                        Var202_lev_rare 1.717677e-01
## 388                            Var202_catP 2.682362e-18
## 389                            Var202_catB 2.582971e-06
## 390                      Var203_lev_x.9_Y1 2.457481e-02
## 391                      Var203_lev_x.F3hy 5.199863e-02
## 392                      Var203_lev_x.HLqf 1.011029e-01
## 393                            Var203_catP 2.512826e-02
## 394                            Var203_catB 5.392982e-01
## 395                      Var204_lev_x.15m3 1.902608e-01
## 396                      Var204_lev_x.m_h1 2.657181e-01
## 397                      Var204_lev_x.RcM7 2.412547e-03
## 398                      Var204_lev_x.rGJy 3.201938e-01
## 399                      Var204_lev_x.RVjC 6.707372e-03
## 400                      Var204_lev_x.SkZj 1.120507e-01
## 401                      Var204_lev_x.z5Ry 3.144715e-03
## 402                            Var204_catP 9.407627e-04
## 403                            Var204_catB 2.171628e-11
## 404                          Var205_lev_NA 3.520832e-01
## 405                      Var205_lev_x.09_Q 2.208737e-03
## 406                   Var205_lev_x.sJzTlal 4.297525e-43
## 407                      Var205_lev_x.VpdQ 4.100066e-11
## 408                            Var205_catP 2.599738e-17
## 409                            Var205_catB 3.967139e-40
## 410                          Var206_lev_NA 1.822398e-15
## 411                   Var206_lev_x.43pnToF 1.959477e-01
## 412                      Var206_lev_x.6JmL 1.236047e-02
## 413                      Var206_lev_x.hAFG 5.468636e-08
## 414                      Var206_lev_x.haYg 8.746431e-12
## 415                      Var206_lev_x.IYzP 3.285526e-23
## 416                      Var206_lev_x.kxE9 8.274335e-06
## 417                      Var206_lev_x.sYC_ 1.925194e-01
## 418                      Var206_lev_x.wMei 2.194114e-01
## 419                      Var206_lev_x.y6dw 7.699583e-14
## 420                      Var206_lev_x.zm5i 5.300895e-14
## 421                            Var206_catP 4.392588e-30
## 422                            Var206_catB 5.187663e-68
## 423       Var207_lev_x.7M47J5GA0pTYIFxg5uy 5.394318e-24
## 424 Var207_lev_x.DHn_WUyBhW_whjA88g9bvA64_ 2.898618e-08
## 425                Var207_lev_x.me75fM6ugJ 1.389566e-27
## 426                Var207_lev_x.NKv3VA1BpP 2.653449e-02
## 427                            Var207_catP 2.546858e-25
## 428                            Var207_catB 2.217665e-32
## 429                      Var208_lev_x.kIsH 1.448640e-01
## 430                      Var208_lev_x.sBgB 8.562110e-02
## 431                            Var208_catP 1.530395e-01
## 432                            Var208_catB 7.321903e-01
## 433                      Var210_lev_x.g5HH 2.862852e-16
## 434                      Var210_lev_x.uKAI 7.611924e-17
## 435                            Var210_catP 9.785209e-17
## 436                            Var210_catB 3.809536e-22
## 437                      Var211_lev_x.L84s 7.064698e-12
## 438                      Var211_lev_x.Mtgm 7.064698e-12
## 439             Var212_lev_x.4kVnq_T26xq1p 1.021609e-06
## 440                      Var212_lev_x.CrNX 8.613110e-08
## 441                   Var212_lev_x.Ie_5MZs 1.096606e-02
## 442                   Var212_lev_x.NhsEn4L 1.170222e-54
## 443             Var212_lev_x.XfqtO3UdzaXh_ 1.074954e-17
## 444                            Var212_catP 6.314134e-53
## 445                            Var212_catB 3.411730e-51
## 446                          Var213_lev_NA 8.505976e-06
## 447                      Var213_lev_x.KdSa 8.505976e-06
## 448                          Var214_lev_NA 9.625925e-29
## 449                        Var214_lev_rare 3.419952e-22
## 450                            Var214_catP 9.623338e-29
## 451                            Var214_catB 1.718490e-16
## 452                            Var215_catP 5.025347e-01
## 453                        Var216_lev_rare 2.882467e-03
## 454                   Var216_lev_x.11p4mKe 3.024677e-02
## 455                   Var216_lev_x.kZJtVhC 8.818825e-03
## 456                   Var216_lev_x.kZJyVg2 1.515818e-03
## 457                   Var216_lev_x.mAja5EA 1.151189e-02
## 458                   Var216_lev_x.mAjbk_S 3.523563e-01
## 459                   Var216_lev_x.NGZxnJM 3.831746e-02
## 460                   Var216_lev_x.XTbPUYD 5.534288e-15
## 461                            Var216_catP 2.257811e-01
## 462                            Var216_catB 2.563654e-60
## 463                        Var217_lev_rare 3.764304e-16
## 464                            Var217_catP 4.250294e-63
## 465                            Var217_catB 5.031841e-25
## 466                      Var218_lev_x.cJvF 1.064714e-28
## 467                      Var218_lev_x.UYBR 1.028586e-11
## 468                            Var218_catP 2.418266e-55
## 469                            Var218_catB 3.577217e-65
## 470                          Var219_lev_NA 3.455312e-01
## 471                   Var219_lev_x.AU8pNoi 3.901751e-03
## 472                      Var219_lev_x.FzaX 1.787239e-02
## 473                            Var219_catP 8.744379e-03
## 474                            Var219_catB 4.589762e-02
## 475                        Var220_lev_rare 3.059497e-06
## 476                   Var220_lev_x.4UxGlow 1.586589e-20
## 477                   Var220_lev_x.UF16siJ 1.178434e-02
## 478                            Var220_catP 9.978122e-23
## 479                            Var220_catB 7.173122e-20
## 480                   Var221_lev_x.d0EEeJi 2.329916e-08
## 481                      Var221_lev_x.oslk 1.188016e-20
## 482                   Var221_lev_x.QKW8DRm 1.699119e-06
## 483                      Var221_lev_x.zCkv 1.187931e-08
## 484                            Var221_catP 1.571828e-20
## 485                            Var221_catB 1.473679e-22
## 486                        Var222_lev_rare 3.059497e-06
## 487                   Var222_lev_x.APgdzOv 1.178434e-02
## 488                   Var222_lev_x.catzS2D 1.586589e-20
## 489                            Var222_catP 9.978122e-23
## 490                            Var222_catB 7.173122e-20
## 491                          Var223_lev_NA 3.455312e-01
## 492                            Var223_catP 9.237290e-01
## 493                            Var223_catB 4.394179e-02
## 494                          Var224_lev_NA 2.014521e-02
## 495                          Var225_lev_NA 2.137222e-29
## 496                      Var225_lev_x.ELof 4.193752e-22
## 497                      Var225_lev_x.kG3k 4.614202e-03
## 498                      Var225_lev_x.xG3x 4.125449e-03
## 499                            Var225_catP 4.506149e-28
## 500                            Var225_catB 2.921872e-31
## 501                      Var226_lev_x.5Acm 4.555151e-01
## 502                      Var226_lev_x.7aLG 2.861099e-01
## 503                      Var226_lev_x.7P5s 4.617143e-07
## 504                      Var226_lev_x.Aoh3 1.826908e-01
## 505                      Var226_lev_x.FSa2 1.918030e-10
## 506                      Var226_lev_x.kwS7 1.599428e-01
## 507                      Var226_lev_x.me1d 3.786301e-03
## 508                      Var226_lev_x.PM2D 6.991124e-02
## 509                      Var226_lev_x.Qcbd 1.312678e-01
## 510                      Var226_lev_x.Qu4f 3.602696e-02
## 511                      Var226_lev_x.rgKb 3.396179e-01
## 512                      Var226_lev_x.szEZ 2.278658e-04
## 513                      Var226_lev_x.TNEC 3.447014e-01
## 514                      Var226_lev_x.uWr3 1.991424e-02
## 515                      Var226_lev_x.Xa3G 1.390213e-02
## 516                            Var226_catP 4.519958e-06
## 517                            Var226_catB 3.080169e-15
## 518                   Var227_lev_x.02N6s8f 2.410784e-03
## 519                      Var227_lev_x.6fzt 2.948771e-01
## 520                   Var227_lev_x.nIGXDli 4.602414e-04
## 521                      Var227_lev_x.RAYp 2.927722e-27
## 522                      Var227_lev_x.ZI9m 1.825722e-24
## 523                            Var227_catP 6.712252e-26
## 524                            Var227_catB 1.253216e-31
## 525                   Var228_lev_x.55YFVY9 9.148369e-10
## 526             Var228_lev_x.F2FyR07IdsN7I 2.261755e-42
## 527             Var228_lev_x.ib5G6X1eUxUn6 8.780686e-13
## 528                Var228_lev_x.iyHGyLCEkQ 2.923118e-09
## 529          Var228_lev_x.R4y5gQQWY8OodqDV 5.551966e-05
## 530       Var228_lev_x.TCU50_Yjmm6GIBZ0lL_ 4.796431e-08
## 531                            Var228_catP 1.055705e-41
## 532                            Var228_catB 2.522467e-43
## 533                          Var229_lev_NA 3.635024e-38
## 534                      Var229_lev_x.am7c 4.398543e-16
## 535                      Var229_lev_x.mj86 2.190401e-14
## 536                            Var229_catP 4.670404e-38
## 537                            Var229_catB 2.832005e-36
treatedTrainM[[yName]] = treatedTrainM[[yName]]==yTarget

treatedTest = prepare(treatmentsC,
                      dTest,
                      pruneSig=kddSig, 
                      parallelCluster=cl)
treatedTest[[yName]] = treatedTest[[yName]]==yTarget


if(!is.null(cl)) {
    parallel::stopCluster(cl)
    cl = NULL
}
# Run other models (with proper coding/training separation).
#
# This gets us back to AUC 0.72

#print(selvars)

# prepare plotting frames
treatedTrainP = treatedTrainM[, yName, drop=FALSE]
treatedTestP = treatedTest[, yName, drop=FALSE]


formulaS = paste(yName,paste(selvars,collapse=' + '),sep=' ~ ')
for(mname in c('glmPred','gbmPred')) {
  print("*****************************")
  print(date())
  print(paste(mname,length(selvars)))
  if(mname=='gbmPred') {
    modelGBMs = gbm(as.formula(formulaS),
                    data=treatedTrainM,
                    distribution='bernoulli',
                    n.trees=1000,
                    interaction.depth=3,
                    keep.data=FALSE,
                    cv.folds=5)
    #print(modelGBMs)
    #print(summary(modelGBMs))
    nTrees = gbm.perf(modelGBMs)
   treatedTestP[[mname]] = predict(modelGBMs,
                                    newdata=treatedTest,type='response',
                                    n.trees=nTrees)
  } else {
    modelglms = cv.glmnet(x = as.matrix(treatedTrainM[,selvars,drop=FALSE]),
                       y = treatedTrainM[[yName]],
                    alpha=0.5,
                    family='binomial')
    #print(summary(modelglms))
     treatedTestP[[mname]] = as.numeric(predict(modelglms,
                                    newx=as.matrix(treatedTest[,selvars,drop=FALSE]),
                                    type='response'))
  }
  
  t2 = paste(mname,'test data')
  print(DoubleDensityPlot(treatedTestP, mname, yName, 
                          title=t2))
  print(ROCPlot(treatedTestP, mname, yName, yTarget,
                title=t2))
  print(PRPlot(treatedTestP, mname, yName, yTarget,
                title=t2))
  print(date())
  print("*****************************")
}
## [1] "*****************************"
## [1] "Mon Sep 26 16:24:27 2016"
## [1] "glmPred 312"