``````library('microbenchmark')
library('pryr')
library('plyr')
library('ggplot2')

# standard naive "non-R" way to square items in a vector
f1 <- function() {
v <- 1:n
for(i in 1:n) {
v[i] <- v[i]^2
}
v
}``````

Show address of value referred by by v is stable (indicating in-place alteration of vector, likely due to limited visibility of the value referred to).

``````# same naive loop with pryr:address() inspection
# doesn't trigger visibile value address changes, but very slow
f1p <- function(verbose=FALSE) {
v <- 1:n
if(verbose) {
}
for(i in 1:n) {
v[i] <- v[i]^2
if(verbose) {
}
}
v
}

# same naive loop with .Internal(inspect(v))
f1b <- function(verbose=FALSE) {
v <- 1:n
if(verbose) {
.Internal(inspect(v))
}
for(i in 1:n) {
v[i] <- v[i]^2
if(verbose) {
.Internal(inspect(v))
}
}
v
}

# even worse case: vLast shares reference to value referred to by v,
# causing address to change (evidence of more object copying)
f1c <- function(verbose=FALSE) {
v <- 1:n
vLast <- v
if(verbose) {
.Internal(inspect(v))
}
for(i in 1:n) {
v[i] <- v[i]^2
vLast <- v
if(verbose) {
.Internal(inspect(v))
}
}
v
}

n <- 5
f1b(TRUE)``````
``````## @7fc6fe8afd70 13 INTSXP g0c3 [NAM(1)] (len=5, tl=0) 1,2,3,4,5
## @7fc6fdf4bb50 14 REALSXP g0c4 [NAM(1)] (len=5, tl=0) 1,2,3,4,5
## @7fc6fdf4bb50 14 REALSXP g0c4 [NAM(1)] (len=5, tl=0) 1,4,3,4,5
## @7fc6fdf4bb50 14 REALSXP g0c4 [NAM(1)] (len=5, tl=0) 1,4,9,4,5
## @7fc6fdf4bb50 14 REALSXP g0c4 [NAM(1)] (len=5, tl=0) 1,4,9,16,5
## @7fc6fdf4bb50 14 REALSXP g0c4 [NAM(1)] (len=5, tl=0) 1,4,9,16,25``````
``## [1]  1  4  9 16 25``
``````# address not changing
n <- 5
f1p(TRUE)``````
``````## [1] "0x7fc6fe133db0"
## [1] "0x7fc6fdf2c070"
## [1] "0x7fc6fdf2c070"
## [1] "0x7fc6fdf2c070"
## [1] "0x7fc6fdf2c070"
## [1] "0x7fc6fdf2c070"``````
``## [1]  1  4  9 16 25``
``````# address changing
f1c(TRUE)``````
``````## @7fc6fe15db28 13 INTSXP g0c3 [NAM(2)] (len=5, tl=0) 1,2,3,4,5
## @7fc6fdf29950 14 REALSXP g0c4 [NAM(2)] (len=5, tl=0) 1,2,3,4,5
## @7fc6fdf29a20 14 REALSXP g0c4 [NAM(2)] (len=5, tl=0) 1,4,3,4,5
## @7fc6fdf29af0 14 REALSXP g0c4 [NAM(2)] (len=5, tl=0) 1,4,9,4,5
## @7fc6fdf25870 14 REALSXP g0c4 [NAM(2)] (len=5, tl=0) 1,4,9,16,5
## @7fc6fdf259a8 14 REALSXP g0c4 [NAM(2)] (len=5, tl=0) 1,4,9,16,25``````
``## [1]  1  4  9 16 25``
``````# pryr::address(v) seems uniquely slower than the other f1X() functions
microbenchmark(f1(),f1b(),f1p(),f1c())``````
``````## Unit: microseconds
##   expr    min      lq     mean  median       uq     max neval
##   f1()  4.587  5.6930  7.10846  6.1925   6.9915  29.247   100
##  f1b()  5.021  6.2815  7.56307  6.7675   7.5230  17.550   100
##  f1p() 78.611 85.6475 96.31977 93.2305 101.2380 231.254   100
##  f1c()  5.643  6.7685  8.62212  7.4920   8.2985  24.596   100``````

Obviously using built-ins is way faster than any interpretted loop or apply.

``````# vector apply way of squaring items
f2 <- function(v) {
v <- 1:n
v <- vapply(v,function(x) {x*x},numeric(1))
v
}

# true vector method of squaring items
f3 <- function() {
v <- 1:n
v <- v^2
v
}

n <- 1000
microbenchmark(f1(),f2(),f3())``````
``````## Unit: microseconds
##  expr     min      lq       mean   median        uq      max neval
##  f1() 769.904 849.662 1098.78994 898.6425 1236.5335 2825.948   100
##  f2() 544.032 624.841  782.76640 665.3085  832.3115 2191.192   100
##  f3()   3.574   4.852    8.38659   6.9350   10.0080   34.839   100``````
``````powseq <- function(min,max,nstep) {
mul <- (max/min)^(1/nstep)
sort(unique(c(min,round(min*mul^(1:nstep)),max)))
}

m <- c()
for(n in powseq(10,10000,40)) {
mi <- as.data.frame(microbenchmark(f1(),f2(),f3()))
mi\$n <- n
m <- rbind(m,mi)
# si <- ddply(mi,'expr',summarize,time=mean(time))
}

qSummary = function(ycol){
data.frame(y=median(ycol),
ymin=quantile(ycol, 0.25),
ymax=quantile(ycol, 0.75))
}
ggplot(data=m,aes(x=n,y=time,color=expr)) +
stat_summary(fun.y=mean,geom='point') +
stat_summary(fun.data=qSummary,geom='errorbar') +
geom_smooth()``````
``## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.``

``````# confirm f3() moving
ggplot(data=m,aes(x=n,y=time,color=expr)) +
stat_summary(fun.y=mean,geom='point') +
stat_summary(fun.data=qSummary,geom='errorbar') +
geom_smooth() +
scale_y_log10() + scale_x_log10()``````
``## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.``

``````for(expr in sort(unique(m\$expr))) {
print(expr)
print(summary(lm(time~I(n*n)+n,data=m[m\$expr==expr,])))
}``````
``````## [1] "f1()"
##
## Call:
## lm(formula = time ~ I(n * n) + n, data = m[m\$expr == expr, ])
##
## Residuals:
##      Min       1Q   Median       3Q      Max
## -1685817   -75159   -10926    -5603 38743412
##
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.943e+03  2.489e+04   0.319   0.7496
## I(n * n)    4.998e-03  2.888e-03   1.730   0.0836 .
## n           9.017e+02  2.416e+01  37.322   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1209000 on 4097 degrees of freedom
## Multiple R-squared:  0.7842, Adjusted R-squared:  0.7841
## F-statistic:  7446 on 2 and 4097 DF,  p-value: < 2.2e-16
##
## [1] "f2()"
##
## Call:
## lm(formula = time ~ I(n * n) + n, data = m[m\$expr == expr, ])
##
## Residuals:
##      Min       1Q   Median       3Q      Max
##  -949245   -65851    -3894      292 34943120
##
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  3.087e+03  1.286e+04   0.240    0.810
## I(n * n)    -1.785e-03  1.493e-03  -1.196    0.232
## n            6.491e+02  1.249e+01  51.972   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 625000 on 4097 degrees of freedom
## Multiple R-squared:  0.8609, Adjusted R-squared:  0.8608
## F-statistic: 1.268e+04 on 2 and 4097 DF,  p-value: < 2.2e-16
##
## [1] "f3()"
##
## Call:
## lm(formula = time ~ I(n * n) + n, data = m[m\$expr == expr, ])
##
## Residuals:
##    Min     1Q Median     3Q    Max
##  -8017   -570   -179     34 675287
##
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  8.995e+02  2.297e+02   3.917 9.13e-05 ***
## I(n * n)    -9.493e-05  2.665e-05  -3.561 0.000373 ***
## n            4.550e+00  2.230e-01  20.404  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11160 on 4097 degrees of freedom
## Multiple R-squared:  0.4108, Adjusted R-squared:  0.4106
## F-statistic:  1429 on 2 and 4097 DF,  p-value: < 2.2e-16``````
``````m <- c()
for(n in powseq(10,1000,40)) {
mi <- as.data.frame(microbenchmark(f1(),f1p(),f2(),f3(),f1c()))
mi\$n <- n
m <- rbind(m,mi)
# si <- ddply(mi,'expr',summarize,time=mean(time))
}

qSummary = function(ycol){
data.frame(y=median(ycol),
ymin=quantile(ycol, 0.25),
ymax=quantile(ycol, 0.75))
}
ggplot(data=m,aes(x=n,y=time,color=expr)) +
stat_summary(fun.y=mean,geom='point') +
stat_summary(fun.data=qSummary,geom='errorbar') +
geom_smooth()``````
``## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.``