c(1,2) + c(3,5)
## [1] 4 7
c(1,c(2,3))
## [1] 1 2 3
sqrt(10)
## [1] 3.162
d <- read.table('http://www.amstat.org/publications/jse/datasets/homes76.dat.txt',
header=TRUE,sep='\t',stringsAsFactor=TRUE)
summary(d)
## id Y X1 X2
## Min. : 1.0 Min. :156 Min. :1.44 Min. : 1.00
## 1st Qu.:19.8 1st Qu.:243 1st Qu.:1.86 1st Qu.: 3.00
## Median :38.5 Median :276 Median :1.97 Median : 4.00
## Mean :38.5 Mean :286 Mean :1.97 Mean : 3.99
## 3rd Qu.:57.2 3rd Qu.:337 3rd Qu.:2.11 3rd Qu.: 5.00
## Max. :76.0 Max. :450 Max. :2.90 Max. :11.00
## X3 X4 X3X4 year
## Min. :1.00 Min. :2.00 Min. : 2.00 Min. :1905
## 1st Qu.:2.00 1st Qu.:3.00 1st Qu.: 6.00 1st Qu.:1958
## Median :2.00 Median :3.00 Median : 6.30 Median :1970
## Mean :2.21 Mean :3.45 Mean : 7.67 Mean :1969
## 3rd Qu.:3.00 3rd Qu.:4.00 3rd Qu.: 9.00 3rd Qu.:1980
## Max. :3.10 Max. :6.00 Max. :15.00 Max. :2005
## X5 X5sq X6 status D7
## Min. :-6.500 Min. : 0.00 Min. :0.00 act:25 Min. :0.000
## 1st Qu.:-1.225 1st Qu.: 0.25 1st Qu.:1.00 pen:13 1st Qu.:0.000
## Median :-0.050 Median : 1.22 Median :2.00 sld:38 Median :0.000
## Mean :-0.059 Mean : 5.45 Mean :1.57 Mean :0.329
## 3rd Qu.: 1.000 3rd Qu.: 9.00 3rd Qu.:2.00 3rd Qu.:1.000
## Max. : 3.500 Max. :42.25 Max. :3.00 Max. :1.000
## elem D8 D9 D10
## adams : 3 Min. :0.000 Min. :0.000 Min. :0.0000
## crest : 6 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0000
## edge :26 Median :0.000 Median :0.000 Median :0.0000
## edison:12 Mean :0.158 Mean :0.184 Mean :0.0395
## harris:14 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.0000
## parker:15 Max. :1.000 Max. :1.000 Max. :1.0000
## D11 D12
## Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.000
## Mean :0.0789 Mean :0.197
## 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.000
colnames(d)
## [1] "id" "Y" "X1" "X2" "X3" "X4" "X3X4"
## [8] "year" "X5" "X5sq" "X6" "status" "D7" "elem"
## [15] "D8" "D9" "D10" "D11" "D12"
colnames(d) <- c('id', 'Price', 'Size', 'Lot', 'Bath', 'Bed', 'BathBed', 'Year', 'Age',
'Agesq', 'Garage', 'Status', 'Active', 'Elem', 'Edison Elementary',
'Harris Elementary', 'Adams Elementary', 'Crest Elementary', 'Parker Elementary')
summary(d)
## id Price Size Lot
## Min. : 1.0 Min. :156 Min. :1.44 Min. : 1.00
## 1st Qu.:19.8 1st Qu.:243 1st Qu.:1.86 1st Qu.: 3.00
## Median :38.5 Median :276 Median :1.97 Median : 4.00
## Mean :38.5 Mean :286 Mean :1.97 Mean : 3.99
## 3rd Qu.:57.2 3rd Qu.:337 3rd Qu.:2.11 3rd Qu.: 5.00
## Max. :76.0 Max. :450 Max. :2.90 Max. :11.00
## Bath Bed BathBed Year
## Min. :1.00 Min. :2.00 Min. : 2.00 Min. :1905
## 1st Qu.:2.00 1st Qu.:3.00 1st Qu.: 6.00 1st Qu.:1958
## Median :2.00 Median :3.00 Median : 6.30 Median :1970
## Mean :2.21 Mean :3.45 Mean : 7.67 Mean :1969
## 3rd Qu.:3.00 3rd Qu.:4.00 3rd Qu.: 9.00 3rd Qu.:1980
## Max. :3.10 Max. :6.00 Max. :15.00 Max. :2005
## Age Agesq Garage Status Active
## Min. :-6.500 Min. : 0.00 Min. :0.00 act:25 Min. :0.000
## 1st Qu.:-1.225 1st Qu.: 0.25 1st Qu.:1.00 pen:13 1st Qu.:0.000
## Median :-0.050 Median : 1.22 Median :2.00 sld:38 Median :0.000
## Mean :-0.059 Mean : 5.45 Mean :1.57 Mean :0.329
## 3rd Qu.: 1.000 3rd Qu.: 9.00 3rd Qu.:2.00 3rd Qu.:1.000
## Max. : 3.500 Max. :42.25 Max. :3.00 Max. :1.000
## Elem Edison Elementary Harris Elementary Adams Elementary
## adams : 3 Min. :0.000 Min. :0.000 Min. :0.0000
## crest : 6 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0000
## edge :26 Median :0.000 Median :0.000 Median :0.0000
## edison:12 Mean :0.158 Mean :0.184 Mean :0.0395
## harris:14 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.0000
## parker:15 Max. :1.000 Max. :1.000 Max. :1.0000
## Crest Elementary Parker Elementary
## Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.000
## Mean :0.0789 Mean :0.197
## 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.000
str(d)
## 'data.frame': 76 obs. of 19 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Price : num 388 450 386 350 156 ...
## $ Size : num 2.18 2.05 2.11 1.44 1.8 ...
## $ Lot : int 4 5 5 6 1 5 4 4 4 5 ...
## $ Bath : num 3 3 2 1 2 2 1.1 2 2.1 2.1 ...
## $ Bed : int 4 4 4 2 4 3 4 4 4 3 ...
## $ BathBed : num 12 12 8 2 8 6 4.4 8 8.4 6.3 ...
## $ Year : int 1940 1957 1955 1956 1994 1940 1958 1961 1965 1968 ...
## $ Age : num -3 -1.3 -1.5 -1.4 2.4 -3 -1.2 -0.9 -0.5 -0.2 ...
## $ Agesq : num 9 1.69 2.25 1.96 5.76 9 1.44 0.81 0.25 0.04 ...
## $ Garage : int 0 2 2 1 1 1 1 2 2 2 ...
## $ Status : Factor w/ 3 levels "act","pen","sld": 3 3 3 1 3 3 1 3 1 3 ...
## $ Active : int 0 0 0 1 0 0 1 0 1 0 ...
## $ Elem : Factor w/ 6 levels "adams","crest",..: 4 4 4 1 1 1 6 6 6 6 ...
## $ Edison Elementary: int 1 1 1 0 0 0 0 0 0 0 ...
## $ Harris Elementary: int 0 0 0 0 0 0 0 0 0 0 ...
## $ Adams Elementary : int 0 0 0 1 1 1 0 0 0 0 ...
## $ Crest Elementary : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Parker Elementary: int 0 0 0 0 0 0 1 1 1 1 ...
class(d)
## [1] "data.frame"
typeof(d)
## [1] "list"
# help(saveRDS)
saveRDS(d,file='tmp.rds')
rm(list=ls())
ls()
## character(0)
d = readRDS(file='tmp.rds')
head(d)
## id Price Size Lot Bath Bed BathBed Year Age Agesq Garage Status Active
## 1 1 388.0 2.180 4 3 4 12 1940 -3.0 9.00 0 sld 0
## 2 2 450.0 2.054 5 3 4 12 1957 -1.3 1.69 2 sld 0
## 3 3 386.0 2.112 5 2 4 8 1955 -1.5 2.25 2 sld 0
## 4 4 350.0 1.442 6 1 2 2 1956 -1.4 1.96 1 act 1
## 5 5 155.5 1.800 1 2 4 8 1994 2.4 5.76 1 sld 0
## 6 6 220.0 1.965 5 2 3 6 1940 -3.0 9.00 1 sld 0
## Elem Edison Elementary Harris Elementary Adams Elementary
## 1 edison 1 0 0
## 2 edison 1 0 0
## 3 edison 1 0 0
## 4 adams 0 0 1
## 5 adams 0 0 1
## 6 adams 0 0 1
## Crest Elementary Parker Elementary
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
# help(loadRDS)
library(ggplot2)
ggplot(data=d,aes(x=Price,y=Size)) + geom_point() + geom_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.