library('glmnet')
## Loading required package: Matrix
## Loaded glmnet 1.9-8
library('ggplot2')

d <- read.table('car.data',header=FALSE,sep=',',stringsAsFactors=TRUE)
yColumn <- 'rating'
vars <- c('buying', 'maint', 'doors', 'persons', 
                 'lug_boot', 'safety')
colnames(d) <- c(vars,yColumn)
summary(d)
##    buying      maint       doors     persons     lug_boot    safety   
##  high :432   high :432   2    :432   2   :576   big  :576   high:576  
##  low  :432   low  :432   3    :432   4   :576   med  :576   low :576  
##  med  :432   med  :432   4    :432   more:576   small:576   med :576  
##  vhigh:432   vhigh:432   5more:432                                    
##    rating    
##  acc  : 384  
##  good :  69  
##  unacc:1210  
##  vgood:  65
# View(d)
set.seed(53525)
d$isTest <- runif(nrow(d))<0.2
d$want <- d[,yColumn]!='unacc'

f <- paste('want',paste(vars,collapse=' + '),sep=' ~ ')
# try glmnet on original data
tryCatch(
   model1 <- cv.glmnet(x=as.matrix(d[!d$isTest,vars,drop=FALSE]),
                    y=d[!d$isTest,'want',drop=TRUE],
                    family='binomial'),
   
   warning = function(w) {w},
   error = function(e) {e})
## <simpleWarning in lognet(x, is.sparse, ix, jx, y, weights, offset, alpha, nobs,     nvars, jd, vp, cl, ne, nx, nlam, flmin, ulam, thresh, isd,     intr, vnames, maxit, kopt, family): NAs introduced by coercion>
# errors out due to non-numeric features
# try to fix it the wrong way- convert to numeric
# (either through ordinal levels or through "hashed features")
dN <- d
for(v in vars) {
  dN[,v] <- as.numeric(d[,v])
}

model2 <- cv.glmnet(x=as.matrix(dN[!dN$isTest,vars,drop=FALSE]),
                    y=dN[!dN$isTest,'want',drop=TRUE],
                    family='binomial')
d$predN <- predict(model2,
                    newx=as.matrix(dN[,vars,drop=FALSE]),
                    type='response')[,1]
ggplot(data=d[d$isTest,]) + 
  geom_density(aes(x=predN,color=want))

print(table(truth=d[d$isTest,'want'],decision=d[d$isTest,'predN']>0.5))
##        decision
## truth   FALSE TRUE
##   FALSE   208   14
##   TRUE     77   17
print(table(truth=d[d$isTest,'want'],decision=d[d$isTest,'predN']>median(d[d$isTest,'predN'])))
##        decision
## truth   FALSE TRUE
##   FALSE   151   71
##   TRUE     17   77
# fix it the right way: model.matrix()
dM <- model.matrix(as.formula(f),d)
print(head(dM))
##   (Intercept) buyinglow buyingmed buyingvhigh maintlow maintmed maintvhigh
## 1           1         0         0           1        0        0          1
## 2           1         0         0           1        0        0          1
## 3           1         0         0           1        0        0          1
## 4           1         0         0           1        0        0          1
## 5           1         0         0           1        0        0          1
## 6           1         0         0           1        0        0          1
##   doors3 doors4 doors5more persons4 personsmore lug_bootmed lug_bootsmall
## 1      0      0          0        0           0           0             1
## 2      0      0          0        0           0           0             1
## 3      0      0          0        0           0           0             1
## 4      0      0          0        0           0           1             0
## 5      0      0          0        0           0           1             0
## 6      0      0          0        0           0           1             0
##   safetylow safetymed
## 1         1         0
## 2         0         1
## 3         0         0
## 4         1         0
## 5         0         1
## 6         0         0
model3 <- cv.glmnet(x=dM[!d$isTest,,drop=FALSE],
                    y=d[!d$isTest,'want',drop=TRUE],
                    family='binomial')
d$predC <- predict(model3,
                    newx=dM,
                    type='response')[,1]
ggplot(data=d[d$isTest,]) + 
  geom_density(aes(x=predC,color=want))

print(table(truth=d[d$isTest,'want'],decision=d[d$isTest,'predC']>0.5))
##        decision
## truth   FALSE TRUE
##   FALSE   211   11
##   TRUE     11   83