# Actual example
# isolet data from https://archive.ics.uci.edu/ml/datasets/ISOLET
library('ggplot2')
library('reshape2')
library('gbm')
## Loading required package: survival
## Loading required package: splines
## Loading required package: lattice
## Loading required package: parallel
## Loaded gbm 2.1
library('ROCR')
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
##     lowess
dTrain = read.table("isolet1+2+3+4.data.gz",
              header=FALSE,sep=',',
              stringsAsFactors=FALSE,blank.lines.skip=TRUE)
dTrain$isTest <- FALSE
dTest = read.table("isolet5.data.gz",
              header=FALSE,sep=',',
              stringsAsFactors=FALSE,blank.lines.skip=TRUE)
dTest$isTest <- TRUE
d <- rbind(dTrain,dTest)
rm(list=c('dTest','dTrain'))
d$V618 <- letters[d$V618]
vars <- colnames(d)[1:617]
yColumn <- 'isLetter'
d <- d[d$V618 %in% c('m','n'),,drop=FALSE]
d[,yColumn] <- d[,'V618']=='n'

formula <- paste(yColumn,paste(vars,collapse=' + '),sep=' ~ ')
# define some helper and reporting functions
# calulcate area under the curve of numeric vectors x,y
# length(x)==length(y)
# y>=0, 0<=x<=1 and x increasing
areaCalc <- function(x,y) {
   # append extra points to get rid of degenerate cases
   x <- c(0,x,1)
   y <- c(0,y,1)
   n <- length(x)
   sum(0.5*(y[-1]+y[-n])*(x[-1]-x[-n]))
}

gainCurve = function(truthcol, predcol, title) {
  truthcol <- as.numeric(truthcol)
  # data frame of pred and truth, sorted in order of the predictions
  d = data.frame(predcol=predcol,truthcol=truthcol)
  predord = order(d[['predcol']], decreasing=TRUE) # reorder, with highest first
  wizard = order(d[['truthcol']], decreasing=TRUE)
  npop = dim(d)[1]
  
  # data frame the cumulative prediction/truth as a function
  # of the fraction of the population we're considering, highest first
  results = data.frame(pctpop= (1:npop)/npop,
                       model = cumsum(d[predord,'truthcol'])/sum(d[['truthcol']]),
                       wizard = cumsum(d[wizard, 'truthcol'])/sum(d[['truthcol']]))
  
  # calculate the areas under each curve
  # gini score is 2* (area - 0.5)
  idealArea = areaCalc(results$pctpop,results$wizard) - 0.5
  modelArea = areaCalc(results$pctpop,results$model) - 0.5
  giniScore = modelArea/idealArea # actually, normalized gini score
  
  # melt the frame into the tall form, for plotting
  results = melt(results, id.vars="pctpop", measure.vars=c("model", "wizard"),
                 variable.name="sort_criterion", value.name="pct_outcome")
  
  gplot = ggplot(data=results, aes(x=pctpop, y=pct_outcome, color=sort_criterion)) + 
    geom_point() + geom_line() + 
    geom_abline(color="gray") +
    ggtitle(paste("Gain curve,", title, '\n', 
       'relative Gini score', format(giniScore,digits=2))) +
       xlab("% items in score order") + ylab("% total category") +
       scale_x_continuous(breaks=seq(0,1,0.1)) +
       scale_y_continuous(breaks=seq(0,1,0.1)) +
    scale_color_manual(values=c('model'='darkblue', 'wizard'='darkgreen'))
  gplot
}


plotROC <- function(title,outcol,predcol) {
  pred <- prediction(predcol,outcol)
  perf <- performance(pred,'tpr','fpr')
  auc <- as.numeric(performance(pred,'auc')@y.values)
  pf <- data.frame(
    FalsePositiveRate=perf@x.values[[1]],
    TruePositiveRate=perf@y.values[[1]])
  plot=ggplot() +
    geom_ribbon(data=pf,aes(x=FalsePositiveRate,ymax=TruePositiveRate,ymin=0),
      fill='blue',alpha=0.3) +
      geom_point(data=pf,aes(x=FalsePositiveRate,y=TruePositiveRate)) +
      geom_line(aes(x=c(0,1),y=c(0,1))) + coord_fixed() +
      ggtitle(paste(title,'\nAUC:',format(auc,digits=2)))
  list(pf=pf,plot=plot)
}


deviance <- function(truth,pred,epsilon=0) {
  pred = pmax(pred, epsilon)
  pred = pmin(pred, 1-epsilon)
  S = 0.0 # assumed log-likelihood of saturated model
  -2*(sum(ifelse(truth,log(pred),log(1-pred)))-S)
}


reportStats <- function(d,test,modelName,title,epsilon) {
  dSub <- d[d$isTest==test,,drop=FALSE]
  tab <- table(truth=dSub[,yColumn],pred=dSub[,modelName]>0.5)
  accuracy <- (tab[1,1] + tab[2,2])/sum(tab)
  note = ifelse(test,'test','train')
  print(paste('\t',note,'accuracy',modelName,format(accuracy,digits=2)))
  residual.deviance <- deviance(dSub[,yColumn],dSub[,modelName],epsilon)
  #print(paste('\tresidual.deviance',residual.deviance))
  null.deviance <- deviance(dSub[,yColumn],mean(dSub[,yColumn]),epsilon)
  #print(paste('\tnull.deviance',null.deviance))
  print(paste("\tmodel explained a",
              format((1-residual.deviance/null.deviance),digits=2),
            "fraction of the variation on",note))  
}

report <- function(d,modelName,title,epsilon=1.0e-2) {
  print("***********")
  print(paste("model",modelName,title))
  reportStats(d,FALSE,modelName,title,epsilon)
  reportStats(d,TRUE,modelName,title,epsilon)
  print(ggplot(data=d[d$isTest==TRUE,,drop=FALSE],
               aes_string(x=modelName,color=yColumn)) + 
    geom_density() + 
    ggtitle(paste(title,'test')))
  print(plotROC(paste(title,'train'),
                d[d$isTest==FALSE,yColumn],
                d[d$isTest==FALSE,modelName])$plot)
  print(plotROC(paste(title,'test'),
                d[d$isTest==TRUE,yColumn],
                d[d$isTest==TRUE,modelName])$plot)
  print(gainCurve(d[d$isTest==FALSE,yColumn],
                d[d$isTest==FALSE,modelName],
                paste(title,'train')))
  print(gainCurve(d[d$isTest==TRUE,yColumn],
                d[d$isTest==TRUE,modelName],
                paste(title,'test')))
  print("***********")
}
# do the GBM modeling
#help(gbm)
# modelGBM <- gbm.fit(x=d[!d$isTest,vars,drop=FALSE],
#                     y=d[!d$isTest,yColumn],
#          distribution='bernoulli',
#          n.trees=400,
#          interaction.depth=3,
#          shrinkage=0.05,
#          bag.fraction=0.5,
#          keep.data=FALSE)
modelGBM <- gbm(as.formula(formula),
                data=d[!d$isTest,,drop=FALSE],
         distribution='bernoulli',
         n.trees=400,
         interaction.depth=3,
         shrinkage=0.05,
         bag.fraction=0.5,
         keep.data=FALSE,
         cv.folds=5)
## Warning in predict.gbm(model, newdata = my.data, n.trees = best.iter.cv):
## NAs introduced by coercion
## Warning in predict.gbm(model, newdata = my.data, n.trees = best.iter.cv):
## NAs introduced by coercion
## Warning in predict.gbm(model, newdata = my.data, n.trees = best.iter.cv):
## NAs introduced by coercion
## Warning in predict.gbm(model, newdata = my.data, n.trees = best.iter.cv):
## NAs introduced by coercion
## Warning in predict.gbm(model, newdata = my.data, n.trees = best.iter.cv):
## NAs introduced by coercion
nTrees <- gbm.perf(modelGBM)
## Using cv method...

print(nTrees)
## [1] 244
print(summary(modelGBM))

##       var      rel.inf
## V298 V298 3.836308e+01
## V297 V297 1.578034e+01
## V106 V106 5.901288e+00
## V138 V138 5.546794e+00
## V201 V201 4.198805e+00
## V295 V295 2.506710e+00
## V107 V107 1.883953e+00
## V76   V76 1.296850e+00
## V137 V137 1.258110e+00
## V139 V139 1.076519e+00
## V170 V170 9.771862e-01
## V294 V294 9.577975e-01
## V133 V133 8.033338e-01
## V105 V105 6.181317e-01
## V230 V230 6.084567e-01
## V202 V202 5.953141e-01
## V226 V226 5.832905e-01
## V35   V35 5.724079e-01
## V140 V140 5.705139e-01
## V303 V303 5.357836e-01
## V296 V296 5.165927e-01
## V169 V169 4.871241e-01
## V112 V112 4.850578e-01
## V172 V172 4.613469e-01
## V117 V117 4.526753e-01
## V233 V233 4.341102e-01
## V6     V6 4.252697e-01
## V264 V264 4.230251e-01
## V189 V189 3.403965e-01
## V304 V304 3.140491e-01
## V167 V167 3.028081e-01
## V268 V268 2.980051e-01
## V400 V400 2.934980e-01
## V166 V166 2.890254e-01
## V171 V171 2.735667e-01
## V44   V44 2.708589e-01
## V73   V73 2.671624e-01
## V439 V439 2.540217e-01
## V182 V182 2.451673e-01
## V404 V404 2.396288e-01
## V609 V609 2.143637e-01
## V293 V293 2.011158e-01
## V235 V235 1.979022e-01
## V15   V15 1.954471e-01
## V208 V208 1.874065e-01
## V104 V104 1.871303e-01
## V474 V474 1.727848e-01
## V41   V41 1.704464e-01
## V528 V528 1.696884e-01
## V118 V118 1.616407e-01
## V150 V150 1.575796e-01
## V614 V614 1.479362e-01
## V407 V407 1.461825e-01
## V575 V575 1.351830e-01
## V85   V85 1.347899e-01
## V219 V219 1.342542e-01
## V257 V257 1.319264e-01
## V94   V94 1.278885e-01
## V80   V80 1.218218e-01
## V210 V210 1.204988e-01
## V317 V317 1.199896e-01
## V594 V594 1.159858e-01
## V432 V432 1.159573e-01
## V45   V45 1.153940e-01
## V232 V232 1.123445e-01
## V64   V64 1.077257e-01
## V116 V116 1.036188e-01
## V203 V203 9.267854e-02
## V206 V206 9.221169e-02
## V544 V544 9.216249e-02
## V191 V191 8.568345e-02
## V215 V215 8.404623e-02
## V156 V156 8.210674e-02
## V176 V176 7.946683e-02
## V135 V135 7.861104e-02
## V209 V209 7.446930e-02
## V318 V318 7.386898e-02
## V213 V213 7.010591e-02
## V319 V319 6.991659e-02
## V536 V536 6.926823e-02
## V271 V271 6.911332e-02
## V146 V146 6.822918e-02
## V188 V188 6.815720e-02
## V596 V596 6.668532e-02
## V367 V367 6.627027e-02
## V299 V299 6.622248e-02
## V154 V154 6.590646e-02
## V113 V113 6.511545e-02
## V229 V229 6.442286e-02
## V516 V516 6.107468e-02
## V134 V134 5.781824e-02
## V613 V613 5.697947e-02
## V13   V13 5.231616e-02
## V310 V310 5.122566e-02
## V173 V173 4.845133e-02
## V573 V573 4.823950e-02
## V330 V330 4.732705e-02
## V262 V262 4.537872e-02
## V590 V590 4.465950e-02
## V119 V119 4.446114e-02
## V340 V340 4.232136e-02
## V269 V269 4.106261e-02
## V82   V82 4.081133e-02
## V225 V225 4.030448e-02
## V220 V220 4.020517e-02
## V481 V481 3.917175e-02
## V12   V12 3.888136e-02
## V222 V222 3.863264e-02
## V120 V120 3.775098e-02
## V462 V462 3.761068e-02
## V477 V477 3.722407e-02
## V520 V520 3.707339e-02
## V342 V342 3.694069e-02
## V93   V93 3.567629e-02
## V280 V280 3.530910e-02
## V541 V541 3.432784e-02
## V66   V66 3.272696e-02
## V408 V408 3.038672e-02
## V291 V291 2.936194e-02
## V518 V518 2.902651e-02
## V236 V236 2.893540e-02
## V141 V141 2.882983e-02
## V610 V610 2.834039e-02
## V162 V162 2.788499e-02
## V510 V510 2.780184e-02
## V102 V102 2.774334e-02
## V252 V252 2.720747e-02
## V368 V368 2.717260e-02
## V574 V574 2.669692e-02
## V500 V500 2.529031e-02
## V556 V556 2.511347e-02
## V175 V175 2.499103e-02
## V200 V200 2.484033e-02
## V144 V144 2.461939e-02
## V108 V108 2.354822e-02
## V373 V373 2.352200e-02
## V399 V399 2.217063e-02
## V155 V155 2.127506e-02
## V174 V174 2.050360e-02
## V608 V608 1.985939e-02
## V165 V165 1.982374e-02
## V576 V576 1.945087e-02
## V617 V617 1.917949e-02
## V557 V557 1.910745e-02
## V395 V395 1.885743e-02
## V77   V77 1.820594e-02
## V123 V123 1.733701e-02
## V50   V50 1.726149e-02
## V441 V441 1.690007e-02
## V180 V180 1.561152e-02
## V394 V394 1.560868e-02
## V301 V301 1.548682e-02
## V371 V371 1.508829e-02
## V129 V129 1.463915e-02
## V183 V183 1.460050e-02
## V177 V177 1.443720e-02
## V465 V465 1.374388e-02
## V473 V473 1.313374e-02
## V186 V186 1.281594e-02
## V338 V338 1.247096e-02
## V227 V227 1.232974e-02
## V588 V588 1.220188e-02
## V46   V46 1.193361e-02
## V99   V99 1.185373e-02
## V501 V501 1.170851e-02
## V197 V197 1.152924e-02
## V100 V100 1.132369e-02
## V376 V376 1.120752e-02
## V372 V372 1.105236e-02
## V71   V71 1.038181e-02
## V199 V199 1.032164e-02
## V542 V542 9.886172e-03
## V478 V478 9.736604e-03
## V96   V96 9.473456e-03
## V374 V374 9.384227e-03
## V198 V198 8.580589e-03
## V78   V78 8.320765e-03
## V70   V70 8.259775e-03
## V562 V562 8.082981e-03
## V20   V20 8.042169e-03
## V196 V196 7.980386e-03
## V237 V237 7.918937e-03
## V433 V433 7.758585e-03
## V260 V260 7.758184e-03
## V270 V270 7.648742e-03
## V159 V159 7.521886e-03
## V600 V600 7.490296e-03
## V436 V436 7.353690e-03
## V214 V214 7.083127e-03
## V132 V132 7.021418e-03
## V416 V416 6.924295e-03
## V142 V142 6.714537e-03
## V484 V484 6.655722e-03
## V521 V521 6.655321e-03
## V207 V207 6.643425e-03
## V506 V506 6.428050e-03
## V31   V31 6.328331e-03
## V245 V245 6.294517e-03
## V397 V397 6.235753e-03
## V246 V246 6.105189e-03
## V530 V530 6.091142e-03
## V178 V178 6.066037e-03
## V127 V127 5.851233e-03
## V364 V364 5.811155e-03
## V231 V231 5.624239e-03
## V7     V7 5.173290e-03
## V86   V86 4.979183e-03
## V3     V3 4.851585e-03
## V379 V379 4.711900e-03
## V285 V285 4.567783e-03
## V598 V598 4.556517e-03
## V128 V128 4.226361e-03
## V205 V205 3.955771e-03
## V398 V398 3.855799e-03
## V256 V256 3.771915e-03
## V597 V597 3.478222e-03
## V187 V187 3.447589e-03
## V365 V365 3.326292e-03
## V49   V49 3.247339e-03
## V193 V193 3.195125e-03
## V337 V337 3.166533e-03
## V75   V75 3.161259e-03
## V434 V434 3.157323e-03
## V168 V168 3.087353e-03
## V311 V311 2.912401e-03
## V42   V42 2.879719e-03
## V217 V217 2.795058e-03
## V53   V53 2.739797e-03
## V604 V604 2.577297e-03
## V524 V524 2.260960e-03
## V62   V62 2.245635e-03
## V612 V612 2.072066e-03
## V313 V313 2.032667e-03
## V615 V615 2.027161e-03
## V309 V309 1.890079e-03
## V599 V599 1.823798e-03
## V287 V287 1.722376e-03
## V495 V495 1.631176e-03
## V470 V470 1.610293e-03
## V275 V275 1.554798e-03
## V267 V267 1.506248e-03
## V224 V224 1.406474e-03
## V190 V190 1.382552e-03
## V194 V194 1.374033e-03
## V410 V410 1.369694e-03
## V81   V81 1.177104e-03
## V381 V381 1.128361e-03
## V101 V101 1.055284e-03
## V539 V539 1.041679e-03
## V124 V124 1.025054e-03
## V591 V591 9.467629e-04
## V250 V250 9.288417e-04
## V160 V160 9.261755e-04
## V161 V161 9.228309e-04
## V605 V605 8.950622e-04
## V487 V487 8.870477e-04
## V68   V68 7.954403e-04
## V95   V95 7.645321e-04
## V149 V149 7.486667e-04
## V607 V607 7.323758e-04
## V507 V507 7.078876e-04
## V475 V475 7.013623e-04
## V405 V405 6.826561e-04
## V442 V442 6.567797e-04
## V558 V558 6.109089e-04
## V281 V281 5.845989e-04
## V302 V302 5.837035e-04
## V611 V611 5.721329e-04
## V486 V486 5.141849e-04
## V5     V5 5.072539e-04
## V564 V564 4.832620e-04
## V16   V16 4.622963e-04
## V577 V577 4.282167e-04
## V258 V258 4.054258e-04
## V548 V548 3.887113e-04
## V512 V512 3.751435e-04
## V261 V261 3.461280e-04
## V513 V513 3.452419e-04
## V300 V300 3.093953e-04
## V572 V572 2.870035e-04
## V39   V39 2.369640e-04
## V566 V566 2.337654e-04
## V147 V147 2.034118e-04
## V90   V90 2.029214e-04
## V476 V476 1.981589e-04
## V47   V47 1.749835e-04
## V63   V63 1.531535e-04
## V396 V396 1.342653e-04
## V549 V549 1.155863e-04
## V292 V292 1.139129e-04
## V4     V4 1.112286e-04
## V69   V69 1.086181e-04
## V328 V328 8.082048e-05
## V529 V529 7.190861e-05
## V1     V1 0.000000e+00
## V2     V2 0.000000e+00
## V8     V8 0.000000e+00
## V9     V9 0.000000e+00
## V10   V10 0.000000e+00
## V11   V11 0.000000e+00
## V14   V14 0.000000e+00
## V17   V17 0.000000e+00
## V18   V18 0.000000e+00
## V19   V19 0.000000e+00
## V21   V21 0.000000e+00
## V22   V22 0.000000e+00
## V23   V23 0.000000e+00
## V24   V24 0.000000e+00
## V25   V25 0.000000e+00
## V26   V26 0.000000e+00
## V27   V27 0.000000e+00
## V28   V28 0.000000e+00
## V29   V29 0.000000e+00
## V30   V30 0.000000e+00
## V32   V32 0.000000e+00
## V33   V33 0.000000e+00
## V34   V34 0.000000e+00
## V36   V36 0.000000e+00
## V37   V37 0.000000e+00
## V38   V38 0.000000e+00
## V40   V40 0.000000e+00
## V43   V43 0.000000e+00
## V48   V48 0.000000e+00
## V51   V51 0.000000e+00
## V52   V52 0.000000e+00
## V54   V54 0.000000e+00
## V55   V55 0.000000e+00
## V56   V56 0.000000e+00
## V57   V57 0.000000e+00
## V58   V58 0.000000e+00
## V59   V59 0.000000e+00
## V60   V60 0.000000e+00
## V61   V61 0.000000e+00
## V65   V65 0.000000e+00
## V67   V67 0.000000e+00
## V72   V72 0.000000e+00
## V74   V74 0.000000e+00
## V79   V79 0.000000e+00
## V83   V83 0.000000e+00
## V84   V84 0.000000e+00
## V87   V87 0.000000e+00
## V88   V88 0.000000e+00
## V89   V89 0.000000e+00
## V91   V91 0.000000e+00
## V92   V92 0.000000e+00
## V97   V97 0.000000e+00
## V98   V98 0.000000e+00
## V103 V103 0.000000e+00
## V109 V109 0.000000e+00
## V110 V110 0.000000e+00
## V111 V111 0.000000e+00
## V114 V114 0.000000e+00
## V115 V115 0.000000e+00
## V121 V121 0.000000e+00
## V122 V122 0.000000e+00
## V125 V125 0.000000e+00
## V126 V126 0.000000e+00
## V130 V130 0.000000e+00
## V131 V131 0.000000e+00
## V136 V136 0.000000e+00
## V143 V143 0.000000e+00
## V145 V145 0.000000e+00
## V148 V148 0.000000e+00
## V151 V151 0.000000e+00
## V152 V152 0.000000e+00
## V153 V153 0.000000e+00
## V157 V157 0.000000e+00
## V158 V158 0.000000e+00
## V163 V163 0.000000e+00
## V164 V164 0.000000e+00
## V179 V179 0.000000e+00
## V181 V181 0.000000e+00
## V184 V184 0.000000e+00
## V185 V185 0.000000e+00
## V192 V192 0.000000e+00
## V195 V195 0.000000e+00
## V204 V204 0.000000e+00
## V211 V211 0.000000e+00
## V212 V212 0.000000e+00
## V216 V216 0.000000e+00
## V218 V218 0.000000e+00
## V221 V221 0.000000e+00
## V223 V223 0.000000e+00
## V228 V228 0.000000e+00
## V234 V234 0.000000e+00
## V238 V238 0.000000e+00
## V239 V239 0.000000e+00
## V240 V240 0.000000e+00
## V241 V241 0.000000e+00
## V242 V242 0.000000e+00
## V243 V243 0.000000e+00
## V244 V244 0.000000e+00
## V247 V247 0.000000e+00
## V248 V248 0.000000e+00
## V249 V249 0.000000e+00
## V251 V251 0.000000e+00
## V253 V253 0.000000e+00
## V254 V254 0.000000e+00
## V255 V255 0.000000e+00
## V259 V259 0.000000e+00
## V263 V263 0.000000e+00
## V265 V265 0.000000e+00
## V266 V266 0.000000e+00
## V272 V272 0.000000e+00
## V273 V273 0.000000e+00
## V274 V274 0.000000e+00
## V276 V276 0.000000e+00
## V277 V277 0.000000e+00
## V278 V278 0.000000e+00
## V279 V279 0.000000e+00
## V282 V282 0.000000e+00
## V283 V283 0.000000e+00
## V284 V284 0.000000e+00
## V286 V286 0.000000e+00
## V288 V288 0.000000e+00
## V289 V289 0.000000e+00
## V290 V290 0.000000e+00
## V305 V305 0.000000e+00
## V306 V306 0.000000e+00
## V307 V307 0.000000e+00
## V308 V308 0.000000e+00
## V312 V312 0.000000e+00
## V314 V314 0.000000e+00
## V315 V315 0.000000e+00
## V316 V316 0.000000e+00
## V320 V320 0.000000e+00
## V321 V321 0.000000e+00
## V322 V322 0.000000e+00
## V323 V323 0.000000e+00
## V324 V324 0.000000e+00
## V325 V325 0.000000e+00
## V326 V326 0.000000e+00
## V327 V327 0.000000e+00
## V329 V329 0.000000e+00
## V331 V331 0.000000e+00
## V332 V332 0.000000e+00
## V333 V333 0.000000e+00
## V334 V334 0.000000e+00
## V335 V335 0.000000e+00
## V336 V336 0.000000e+00
## V339 V339 0.000000e+00
## V341 V341 0.000000e+00
## V343 V343 0.000000e+00
## V344 V344 0.000000e+00
## V345 V345 0.000000e+00
## V346 V346 0.000000e+00
## V347 V347 0.000000e+00
## V348 V348 0.000000e+00
## V349 V349 0.000000e+00
## V350 V350 0.000000e+00
## V351 V351 0.000000e+00
## V352 V352 0.000000e+00
## V353 V353 0.000000e+00
## V354 V354 0.000000e+00
## V355 V355 0.000000e+00
## V356 V356 0.000000e+00
## V357 V357 0.000000e+00
## V358 V358 0.000000e+00
## V359 V359 0.000000e+00
## V360 V360 0.000000e+00
## V361 V361 0.000000e+00
## V362 V362 0.000000e+00
## V363 V363 0.000000e+00
## V366 V366 0.000000e+00
## V369 V369 0.000000e+00
## V370 V370 0.000000e+00
## V375 V375 0.000000e+00
## V377 V377 0.000000e+00
## V378 V378 0.000000e+00
## V380 V380 0.000000e+00
## V382 V382 0.000000e+00
## V383 V383 0.000000e+00
## V384 V384 0.000000e+00
## V385 V385 0.000000e+00
## V386 V386 0.000000e+00
## V387 V387 0.000000e+00
## V388 V388 0.000000e+00
## V389 V389 0.000000e+00
## V390 V390 0.000000e+00
## V391 V391 0.000000e+00
## V392 V392 0.000000e+00
## V393 V393 0.000000e+00
## V401 V401 0.000000e+00
## V402 V402 0.000000e+00
## V403 V403 0.000000e+00
## V406 V406 0.000000e+00
## V409 V409 0.000000e+00
## V411 V411 0.000000e+00
## V412 V412 0.000000e+00
## V413 V413 0.000000e+00
## V414 V414 0.000000e+00
## V415 V415 0.000000e+00
## V417 V417 0.000000e+00
## V418 V418 0.000000e+00
## V419 V419 0.000000e+00
## V420 V420 0.000000e+00
## V421 V421 0.000000e+00
## V422 V422 0.000000e+00
## V423 V423 0.000000e+00
## V424 V424 0.000000e+00
## V425 V425 0.000000e+00
## V426 V426 0.000000e+00
## V427 V427 0.000000e+00
## V428 V428 0.000000e+00
## V429 V429 0.000000e+00
## V430 V430 0.000000e+00
## V431 V431 0.000000e+00
## V435 V435 0.000000e+00
## V437 V437 0.000000e+00
## V438 V438 0.000000e+00
## V440 V440 0.000000e+00
## V443 V443 0.000000e+00
## V444 V444 0.000000e+00
## V445 V445 0.000000e+00
## V446 V446 0.000000e+00
## V447 V447 0.000000e+00
## V448 V448 0.000000e+00
## V449 V449 0.000000e+00
## V450 V450 0.000000e+00
## V451 V451 0.000000e+00
## V452 V452 0.000000e+00
## V453 V453 0.000000e+00
## V454 V454 0.000000e+00
## V455 V455 0.000000e+00
## V456 V456 0.000000e+00
## V457 V457 0.000000e+00
## V458 V458 0.000000e+00
## V459 V459 0.000000e+00
## V460 V460 0.000000e+00
## V461 V461 0.000000e+00
## V463 V463 0.000000e+00
## V464 V464 0.000000e+00
## V466 V466 0.000000e+00
## V467 V467 0.000000e+00
## V468 V468 0.000000e+00
## V469 V469 0.000000e+00
## V471 V471 0.000000e+00
## V472 V472 0.000000e+00
## V479 V479 0.000000e+00
## V480 V480 0.000000e+00
## V482 V482 0.000000e+00
## V483 V483 0.000000e+00
## V485 V485 0.000000e+00
## V488 V488 0.000000e+00
## V489 V489 0.000000e+00
## V490 V490 0.000000e+00
## V491 V491 0.000000e+00
## V492 V492 0.000000e+00
## V493 V493 0.000000e+00
## V494 V494 0.000000e+00
## V496 V496 0.000000e+00
## V497 V497 0.000000e+00
## V498 V498 0.000000e+00
## V499 V499 0.000000e+00
## V502 V502 0.000000e+00
## V503 V503 0.000000e+00
## V504 V504 0.000000e+00
## V505 V505 0.000000e+00
## V508 V508 0.000000e+00
## V509 V509 0.000000e+00
## V511 V511 0.000000e+00
## V514 V514 0.000000e+00
## V515 V515 0.000000e+00
## V517 V517 0.000000e+00
## V519 V519 0.000000e+00
## V522 V522 0.000000e+00
## V523 V523 0.000000e+00
## V525 V525 0.000000e+00
## V526 V526 0.000000e+00
## V527 V527 0.000000e+00
## V531 V531 0.000000e+00
## V532 V532 0.000000e+00
## V533 V533 0.000000e+00
## V534 V534 0.000000e+00
## V535 V535 0.000000e+00
## V537 V537 0.000000e+00
## V538 V538 0.000000e+00
## V540 V540 0.000000e+00
## V543 V543 0.000000e+00
## V545 V545 0.000000e+00
## V546 V546 0.000000e+00
## V547 V547 0.000000e+00
## V550 V550 0.000000e+00
## V551 V551 0.000000e+00
## V552 V552 0.000000e+00
## V553 V553 0.000000e+00
## V554 V554 0.000000e+00
## V555 V555 0.000000e+00
## V559 V559 0.000000e+00
## V560 V560 0.000000e+00
## V561 V561 0.000000e+00
## V563 V563 0.000000e+00
## V565 V565 0.000000e+00
## V567 V567 0.000000e+00
## V568 V568 0.000000e+00
## V569 V569 0.000000e+00
## V570 V570 0.000000e+00
## V571 V571 0.000000e+00
## V578 V578 0.000000e+00
## V579 V579 0.000000e+00
## V580 V580 0.000000e+00
## V581 V581 0.000000e+00
## V582 V582 0.000000e+00
## V583 V583 0.000000e+00
## V584 V584 0.000000e+00
## V585 V585 0.000000e+00
## V586 V586 0.000000e+00
## V587 V587 0.000000e+00
## V589 V589 0.000000e+00
## V592 V592 0.000000e+00
## V593 V593 0.000000e+00
## V595 V595 0.000000e+00
## V601 V601 0.000000e+00
## V602 V602 0.000000e+00
## V603 V603 0.000000e+00
## V606 V606 0.000000e+00
## V616 V616 0.000000e+00
d$modelGBM <- predict(modelGBM,newdata=d,type='response',
                      n.trees=nTrees)
report(d,'modelGBM',"GBM")
## [1] "***********"
## [1] "model modelGBM GBM"
## [1] "\t train accuracy modelGBM 1"
## [1] "\tmodel explained a 0.97 fraction of the variation on train"
## [1] "\t test accuracy modelGBM 0.89"
## [1] "\tmodel explained a 0.67 fraction of the variation on test"

## [1] "***********"
# residuals diagram
library(gbm)
library(ggplot2)

d <- data.frame(x=1:50)
d$y <- sin(0.2*d$x)
ggplot(data=d,aes(x=x,y=y)) + geom_point()

modelGBM1 <- gbm(y~x,data=d,n.trees=100,distribution="gaussian",shrinkage=0.8)
d$pred1 <- predict(modelGBM1,newdata=d,type='response',n.trees=1)
ggplot(data=d) + 
  geom_line(aes(x=x,y=y)) +  geom_point(aes(x=x,y=y)) +
  geom_point(aes(x=x,y=pred1),color='blue',size=3) + 
  geom_segment(aes(x=x,xend=x,y=y,yend=pred1),color='red')

d$pred2 <- predict(modelGBM1,newdata=d,type='response',n.trees=2)
ggplot(data=d) + 
  geom_line(aes(x=x,y=y)) +  geom_point(aes(x=x,y=y)) +
  geom_point(aes(x=x,y=pred1),color='green',size=2) + 
  geom_point(aes(x=x,y=pred2),color='blue',size=3) + 
  geom_segment(aes(x=x,xend=x,y=y,yend=pred2),color='red')

d$pred3 <- predict(modelGBM1,newdata=d,type='response',n.trees=3)
ggplot(data=d) + 
  geom_line(aes(x=x,y=y)) +  geom_point(aes(x=x,y=y)) +
  geom_point(aes(x=x,y=pred2),color='green',size=2) + 
  geom_point(aes(x=x,y=pred3),color='blue',size=3) + 
  geom_segment(aes(x=x,xend=x,y=y,yend=pred3),color='red')

d$pred <- predict(modelGBM1,newdata=d,type='response',n.trees=100)
ggplot(data=d) + 
  geom_line(aes(x=x,y=y)) +  geom_point(aes(x=x,y=y)) +
  geom_point(aes(x=x,y=pred),color='blue',size=3) + 
  geom_segment(aes(x=x,xend=x,y=y,yend=pred),color='red')