Cross validated models. Five repetitions of typical test/train split.
load('csteps.Rdata')
source('Lfns.R')
source('Afns.R')
## Loading required package: ggplot2
## Loading required package: grid
## Loading required package: gridExtra
## Loading required package: reshape2
## Loading required package: ROCR
## Loading required package: gplots
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
##
## Loading required package: plyr
## Loading required package: stringr
## Loading required package: survival
## Loading required package: lattice
## Loading required package: splines
## Loaded gbm 2.1.1
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:randomForest':
##
## combine
##
## The following objects are masked from 'package:data.table':
##
## between, last
##
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:survival':
##
## cluster
# problems with ddply when input and output column names match
# work around
killMean <- '\\.mean$'
changeColNames <- function(d,regexpToZap) {
toFix <- grep(regexpToZap,colnames(d))
if(length(toFix)>0) {
cols <- colnames(d)[toFix]
for(ci in cols) {
newName <- gsub(regexpToZap,'',ci)
d[[newName]] <- d[[ci]]
d[[ci]] <- c()
}
}
d
}
# # 95% confidence interval from fit normal distribution
# crossValFS <- ddply(crossValF,.(model),summarize,
# cvtrain.ndeviance.var=var(train.ndeviance),
# cvtrain.ndeviance.mean=mean(train.ndeviance),
# cvtrain.auc.var=var(train.auc),
# cvtrain.auc.mean=mean(train.auc),
# cvtest.ndeviance.var=var(test.ndeviance),
# cvtest.ndeviance.mean=mean(test.ndeviance),
# cvtest.auc.var=var(test.auc),
# cvtest.auc.mean=mean(test.auc))
# crossValFS <- changeColNames(crossValFS,killMean)
# for(col in c('cvtrain.ndeviance', 'cvtrain.auc',
# 'cvtest.ndeviance', 'cvtest.auc')) {
# crossValFS[[paste(col,'lW',sep='.')]] <- qnorm(0.025,
# mean=crossValFS[[col]],
# sd=sqrt(crossValFS[[paste(col,'var',sep='.')]]))
# crossValFS[[paste(col,'uW',sep='.')]] <- qnorm(1-0.025,
# mean=crossValFS[[col]],
# sd=sqrt(crossValFS[[paste(col,'var',sep='.')]]))
# crossValFS[[paste(col,'var',sep='.')]] <- c()
# }
# empirical min/max
crossValFS <- ddply(crossValF,.(model),summarize,
cvtrain.ndeviance.lW=min(train.ndeviance),
cvtrain.ndeviance.mean=mean(train.ndeviance),
cvtrain.ndeviance.uW=max(train.ndeviance),
cvtrain.auc.lW=min(train.auc),
cvtrain.auc.mean=mean(train.auc),
cvtrain.auc.uW=max(train.auc),
cvtest.ndeviance.lW=min(test.ndeviance),
cvtest.ndeviance.mean=mean(test.ndeviance),
cvtest.ndeviance.uW=max(test.ndeviance),
cvtest.auc.lW=min(test.auc),
cvtest.auc.mean=mean(test.auc),
cvtest.auc.uW=max(test.auc))
crossValFS <- changeColNames(crossValFS,killMean)
print(crossValFS)
## model cvtrain.ndeviance.lW
## 1 best single variable model 0.5119605
## 2 elastic net logistic regression 0.4828348
## 3 GAM logistic regression 0.4744629
## 4 gbm 0.5025301
## 5 logistic regression 0.4762358
## 6 null model 0.5203885
## 7 random forest 0.1173837
## cvtrain.ndeviance.uW cvtrain.auc.lW cvtrain.auc.uW cvtest.ndeviance.lW
## 1 0.5190056 0.5592328 0.5754775 0.5013121
## 2 0.4997612 0.6907118 0.7212703 0.4752461
## 3 0.4905575 0.6975658 0.7318626 0.4682064
## 4 0.5129862 0.6830808 0.7241729 0.4935722
## 5 0.4918602 0.6952648 0.7280958 0.4716966
## 6 0.5260990 0.5000000 0.5000000 0.5125123
## 7 0.3536761 0.9247529 0.9999999 0.4736492
## cvtest.ndeviance.uW cvtest.auc.lW cvtest.auc.uW cvtrain.ndeviance
## 1 0.5352311 0.5633464 0.5855272 0.5148990
## 2 0.5116335 0.6750312 0.7170742 0.4906506
## 3 0.5031028 0.6914669 0.7323716 0.4807271
## 4 0.5283116 0.6792729 0.7421085 0.5069347
## 5 0.5027110 0.6834864 0.7207276 0.4829794
## 6 0.5430504 0.5000000 0.5000000 0.5239332
## 7 0.7007900 0.5998270 0.7273010 0.2246288
## cvtrain.auc cvtest.ndeviance cvtest.auc
## 1 0.5700793 0.5145496 0.5766845
## 2 0.7056507 0.4939078 0.6983311
## 3 0.7179343 0.4867967 0.7096587
## 4 0.7063694 0.5088182 0.7020477
## 5 0.7128119 0.4882532 0.7036908
## 6 0.5000000 0.5251227 0.5000000
## 7 0.9734611 0.5839543 0.6530627
print(plotResultRanges(crossValFS,plotRanges=TRUE))
## $AUC
##
## $normalized.deviance