Cross validated models. Five repetitions of typical test/train split.

load('csteps.Rdata')
source('Lfns.R')
source('Afns.R')
## Loading required package: ggplot2
## Loading required package: grid
## Loading required package: gridExtra
## Loading required package: reshape2
## Loading required package: ROCR
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
##     lowess
## 
## Loading required package: plyr
## Loading required package: stringr
## Loading required package: survival
## Loading required package: lattice
## Loading required package: splines
## Loaded gbm 2.1.1
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:randomForest':
## 
##     combine
## 
## The following objects are masked from 'package:data.table':
## 
##     between, last
## 
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:survival':
## 
##     cluster
# problems with ddply when input and output column names match
# work around
killMean <- '\\.mean$'

changeColNames <- function(d,regexpToZap) {
  toFix <- grep(regexpToZap,colnames(d))
  if(length(toFix)>0) {
    cols <- colnames(d)[toFix]
    for(ci in cols) {
      newName <- gsub(regexpToZap,'',ci)
      d[[newName]] <- d[[ci]]
      d[[ci]] <- c()
    }
  }
  d
}



# #  95% confidence interval from fit normal distribution
# crossValFS <- ddply(crossValF,.(model),summarize,
#       cvtrain.ndeviance.var=var(train.ndeviance),
#       cvtrain.ndeviance.mean=mean(train.ndeviance),
#       cvtrain.auc.var=var(train.auc),
#       cvtrain.auc.mean=mean(train.auc),
#       cvtest.ndeviance.var=var(test.ndeviance),
#       cvtest.ndeviance.mean=mean(test.ndeviance),
#       cvtest.auc.var=var(test.auc),
#       cvtest.auc.mean=mean(test.auc))
# crossValFS <- changeColNames(crossValFS,killMean)
# for(col in c('cvtrain.ndeviance', 'cvtrain.auc',
#              'cvtest.ndeviance', 'cvtest.auc')) {
#   crossValFS[[paste(col,'lW',sep='.')]] <- qnorm(0.025,
#         mean=crossValFS[[col]],
#         sd=sqrt(crossValFS[[paste(col,'var',sep='.')]]))
#   crossValFS[[paste(col,'uW',sep='.')]] <-  qnorm(1-0.025,
#         mean=crossValFS[[col]],
#         sd=sqrt(crossValFS[[paste(col,'var',sep='.')]]))
#   crossValFS[[paste(col,'var',sep='.')]] <- c()
# }

# empirical min/max
crossValFS <- ddply(crossValF,.(model),summarize,
      cvtrain.ndeviance.lW=min(train.ndeviance),
      cvtrain.ndeviance.mean=mean(train.ndeviance),
      cvtrain.ndeviance.uW=max(train.ndeviance),
      cvtrain.auc.lW=min(train.auc),
      cvtrain.auc.mean=mean(train.auc),
      cvtrain.auc.uW=max(train.auc),
      cvtest.ndeviance.lW=min(test.ndeviance),
      cvtest.ndeviance.mean=mean(test.ndeviance),
      cvtest.ndeviance.uW=max(test.ndeviance),
      cvtest.auc.lW=min(test.auc),
      cvtest.auc.mean=mean(test.auc),
      cvtest.auc.uW=max(test.auc))
crossValFS <- changeColNames(crossValFS,killMean)


print(crossValFS)
##                             model cvtrain.ndeviance.lW
## 1      best single variable model            0.5119605
## 2 elastic net logistic regression            0.4828348
## 3         GAM logistic regression            0.4744629
## 4                             gbm            0.5025301
## 5             logistic regression            0.4762358
## 6                      null model            0.5203885
## 7                   random forest            0.1173837
##   cvtrain.ndeviance.uW cvtrain.auc.lW cvtrain.auc.uW cvtest.ndeviance.lW
## 1            0.5190056      0.5592328      0.5754775           0.5013121
## 2            0.4997612      0.6907118      0.7212703           0.4752461
## 3            0.4905575      0.6975658      0.7318626           0.4682064
## 4            0.5129862      0.6830808      0.7241729           0.4935722
## 5            0.4918602      0.6952648      0.7280958           0.4716966
## 6            0.5260990      0.5000000      0.5000000           0.5125123
## 7            0.3536761      0.9247529      0.9999999           0.4736492
##   cvtest.ndeviance.uW cvtest.auc.lW cvtest.auc.uW cvtrain.ndeviance
## 1           0.5352311     0.5633464     0.5855272         0.5148990
## 2           0.5116335     0.6750312     0.7170742         0.4906506
## 3           0.5031028     0.6914669     0.7323716         0.4807271
## 4           0.5283116     0.6792729     0.7421085         0.5069347
## 5           0.5027110     0.6834864     0.7207276         0.4829794
## 6           0.5430504     0.5000000     0.5000000         0.5239332
## 7           0.7007900     0.5998270     0.7273010         0.2246288
##   cvtrain.auc cvtest.ndeviance cvtest.auc
## 1   0.5700793        0.5145496  0.5766845
## 2   0.7056507        0.4939078  0.6983311
## 3   0.7179343        0.4867967  0.7096587
## 4   0.7063694        0.5088182  0.7020477
## 5   0.7128119        0.4882532  0.7036908
## 6   0.5000000        0.5251227  0.5000000
## 7   0.9734611        0.5839543  0.6530627
print(plotResultRanges(crossValFS,plotRanges=TRUE))
## $AUC

## 
## $normalized.deviance