Set up some shared stuff

library('microbenchmark')
library('ggplot2')
set.seed(23525) # make run more repeatable

nCol <- 10
timeSeq <- seq.int(100,2000,100)

Function to populate a row of data.

mkRow <- function(nCol) {
  x <- as.list(rnorm(nCol))
  x[[1]] <- ifelse(x[[1]]>0,'pos','neg')
  names(x) <- paste('x',seq_len(nCol),sep='.')
  x
}

The common wrong-way to accumulate the rows of data into a single data frame.

mkFrameForLoop <- function(nRow,nCol) {
  d <- c()
  for(i in seq_len(nRow)) {
    ri <- mkRow(nCol)
    di <- data.frame(ri,
                     stringsAsFactors=FALSE)
    d <- rbind(d,di)
  }
  d
}
# build function to plot timings
# devtools::install_github("WinVector/WVPlots")
# library('WVPlots')
plotTimings <- function(timings) {
  timings$expr <- reorder(timings$expr,-timings$time,FUN=max)
  ggplot(data=timings,aes(x=nRow,y=time,color=expr)) +
    geom_point(alpha=0.8) + geom_smooth(alpha=0.8) 
  nmax <- max(timings$nRow)
  tsub <- timings[timings$nRow==nmax,]
  tsub$expr <- reorder(tsub$expr,tsub$time,FUN=median)
  list(
    ggplot(data=timings,aes(x=nRow,y=time,color=expr)) +
      geom_point(alpha=0.8) + geom_smooth(alpha=0.8),
    ggplot(data=timings,aes(x=nRow,y=time,color=expr)) +
      geom_point(alpha=0.8) + geom_smooth(alpha=0.8) +
      scale_y_log10(),
    WVPlots::ScatterBoxPlot(tsub,'expr','time',
                            title=paste('nRow = ',nmax)) +
      coord_flip()
  )
}

Timing showing the quadratic runtime.

timings <-  vector("list", length(timeSeq))
for(i in seq_len(length(timeSeq))) {
  nRow <- timeSeq[[i]]
  ti <- microbenchmark(
    mkFrameForLoop(nRow,nCol),
    times=10)
  ti <- data.frame(ti,
                   stringsAsFactors=FALSE)
  ti$nRow <- nRow
  ti$nCol <- nCol
  timings[[i]] <- ti
}
timings <- data.table::rbindlist(timings)

print(plotTimings(timings))
## [[1]]
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

## 
## [[2]]
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

## 
## [[3]]

A few roughly equivilent right ways to accumlate the rows.

# Simplest fix, collect the data in a list and
# grow the list.  Exploits the fact that R can mutate
# common objects when object visibility is limited.
mkFrameForList <- function(nRow,nCol,verbose=FALSE) {
  d <- as.list(seq_len(nRow)) # pre-alloc destination
  for(i in seq_len(nRow)) {
    ri <- mkRow(nCol)
    di <- data.frame(ri,
                     stringsAsFactors=FALSE)
    d[[i]] <- di
    if(verbose) {
      print(pryr::address(d))
    }
  }
  do.call(rbind,d)
}

# Cleanest fix- wrap procedure in a function and
# use lapply.
mkFrameList <- function(nRow,nCol) {
  d <- lapply(seq_len(nRow),function(i) {
    ri <- mkRow(nCol)
    data.frame(ri,
               stringsAsFactors=FALSE)
  })
  do.call(rbind,d)
}

Confirm value getting altered in place (effiency depends on interior columns also not chaning address, which is also the case).

mkFrameForList(10,5,TRUE)
## [1] "0x7f9e406c61f0"
## [1] "0x7f9e406c61f0"
## [1] "0x7f9e406c61f0"
## [1] "0x7f9e406c61f0"
## [1] "0x7f9e406c61f0"
## [1] "0x7f9e406c61f0"
## [1] "0x7f9e406c61f0"
## [1] "0x7f9e406c61f0"
## [1] "0x7f9e406c61f0"
## [1] "0x7f9e406c61f0"
##    x.1         x.2          x.3         x.4        x.5
## 1  neg -0.60301455  0.198773364  0.05982714 -1.2601829
## 2  pos  1.96341648 -0.564996823  0.37151297 -0.3083645
## 3  neg  0.57626331  0.028149072 -0.19453441 -3.0256527
## 4  pos  0.28755502  0.007095078 -0.05992568 -0.5024812
## 5  neg -0.16336357 -1.561558013 -0.15738111 -0.6285802
## 6  neg -1.37783842  0.830355860 -0.79339171  0.4223554
## 7  pos -0.52289654  1.663399105 -0.90919184 -1.3405057
## 8  pos -1.40676558 -1.422215092  0.37772800 -1.5121601
## 9  neg  0.14575393 -0.573764946  1.17134733 -0.4860252
## 10 pos  0.03247208 -0.756778828 -1.11823951  0.3115500

Get more timings and plots.

timingsPrev <- timings
timings <-  vector("list", length(timeSeq))
for(i in seq_len(length(timeSeq))) {
  nRow <- timeSeq[[i]]
  ti <- microbenchmark(
    mkFrameForList(nRow,nCol),
    mkFrameList(nRow,nCol),
    times=10)
  ti <- data.frame(ti,
                   stringsAsFactors=FALSE)
  ti$nRow <- nRow
  ti$nCol <- nCol
  timings[[i]] <- ti
}
timings <- data.table::rbindlist(timings)

print(plotTimings(timings))
## [[1]]
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.