library('biglm')
## Loading required package: DBI
datn <- function(n) {
d <- data.frame(x=runif(n))
d$y <- d$x>=runif(nrow(d))
d
}
d <- datn(100000)
formula <- 'y~x'
fitter <- function(formula,d) {
model <- bigglm(as.formula(formula),d,family=binomial(link='logit'))
list(predict=function(newd) {predict(model,newdata=newd,type='response')[,1]})
}
model <- fitter(formula,d)
print(head(model$predict(d)))
## 1 2 3 4 5 6
## 0.07929895 0.21976475 0.34121709 0.46146785 0.52353363 0.89666527
sizeTest1 <- function(d) {
model <- fitter(formula,d)
length(serialize(model, NULL))
}
for(n in c(1000,10000,100000)) {
print(sizeTest1(datn(n)))
}
## [1] 80704
## [1] 188704
## [1] 1268704
#' build a new funcion with a smaller environment
#' @param f input function
#' @param varaibles names we are allowing to be captured in the closere
#' @return new function with closure restricted to varaibles
#' @export
restrictEnvironment <- function(f,varList) {
oldEnv <- environment(f)
newEnv <- new.env(parent=parent.env(oldEnv))
for(v in varList) {
assign(v,get(v,envir=oldEnv),envir=newEnv)
}
environment(f) <- newEnv
f
}
fitter <- function(formula,d) {
model <- bigglm(as.formula(formula),d,family=binomial(link='logit'))
model$family$variance <- c()
model$family$dev.resids <- c()
model$family$aic <- c()
model$family$mu.eta <- c()
model$family$initialize <- c()
model$family$validmu <- c()
model$family$valideta <- c()
model$family$simulate <- c()
environment(model$terms) <- new.env(parent=globalenv())
list(predict=
restrictEnvironment(function(newd) {
predict(model,newdata=newd,type='response')[,1]
},
'model'))
}
model <- fitter(formula,d)
print(head(model$predict(d)))
## 1 2 3 4 5 6
## 0.07929895 0.21976475 0.34121709 0.46146785 0.52353363 0.89666527
for(n in c(1000,10000,100000)) {
print(sizeTest1(datn(n)))
}
## [1] 44146
## [1] 44146
## [1] 44146
# trying to observe address
f <- function() {
a <- list(b = 1)
print(pryr::address(a))
a$b <- 2
print(pryr::address(a))
a$b <- 3
print(pryr::address(a))
}
f()
## [1] "0x7f990cc16638"
## [1] "0x7f990cb976a8"
## [1] "0x7f990cb97ea8"
# timing
f1c <- function(n,verbose,shadow) {
v <- 1:n
vLast <- c()
if(shadow) {
vLast <- v
}
if(verbose) {
print(pryr::address(v))
}
for(i in 1:n) {
v[i] <- v[i]^2
if(shadow) {
vLast <- v
}
if(verbose) {
print(pryr::address(v))
}
}
c()
}
f1c(5,TRUE,FALSE)
## [1] "0x7f990cc49d58"
## [1] "0x7f990aed26e8"
## [1] "0x7f990aed2750"
## [1] "0x7f990aed27b8"
## [1] "0x7f990aed2820"
## [1] "0x7f990aed28f0"
## NULL
print(system.time(f1c(30000,FALSE,FALSE)))
## user system elapsed
## 0.064 0.003 0.082
print(system.time(f1c(30000,FALSE,TRUE)))
## user system elapsed
## 2.238 1.076 3.347