Map data records from block records (which each record may be more than one row) to row records (where each record is a single row).

blocks_to_rowrecs(
  tallTable,
  keyColumns,
  controlTable,
  ...,
  columnsToCopy = NULL,
  checkNames = TRUE,
  checkKeys = TRUE,
  strict = FALSE,
  controlTableKeys = colnames(controlTable)[[1]],
  tmp_name_source = wrapr::mk_tmp_name_source("bltrr"),
  temporary = TRUE,
  allow_rqdatatable = FALSE
)

# S3 method for default
blocks_to_rowrecs(
  tallTable,
  keyColumns,
  controlTable,
  ...,
  columnsToCopy = NULL,
  checkNames = TRUE,
  checkKeys = FALSE,
  strict = FALSE,
  controlTableKeys = colnames(controlTable)[[1]],
  tmp_name_source = wrapr::mk_tmp_name_source("btrd"),
  temporary = TRUE,
  allow_rqdatatable = FALSE
)

# S3 method for relop
blocks_to_rowrecs(
  tallTable,
  keyColumns,
  controlTable,
  ...,
  columnsToCopy = NULL,
  checkNames = TRUE,
  checkKeys = FALSE,
  strict = FALSE,
  controlTableKeys = colnames(controlTable)[[1]],
  tmp_name_source = wrapr::mk_tmp_name_source("bltrr"),
  temporary = TRUE,
  allow_rqdatatable = FALSE
)

Arguments

tallTable

data.frame containing data to be mapped (in-memory data.frame).

keyColumns

character vector of column defining row groups

controlTable

table specifying mapping (local data frame)

...

force later arguments to be by name.

columnsToCopy

character, extra columns to copy.

checkNames

logical, if TRUE check names.

checkKeys

logical, if TRUE check keyColumns uniquely identify blocks (required).

strict

logical, if TRUE check control table name forms

controlTableKeys

character, which column names of the control table are considered to be keys.

tmp_name_source

a tempNameGenerator from cdata::mk_tmp_name_source()

temporary

logical, if TRUE use temporary tables

allow_rqdatatable

logical, if TRUE allow rqdatatable shortcutting on simple conversions.

Value

wide table built by mapping key-grouped tallTable rows to one row per group

Details

The controlTable defines the names of each data element in the two notations: the notation of the tall table (which is row oriented) and the notation of the wide table (which is column oriented). controlTable[ , 1] (the group label) cross colnames(controlTable) (the column labels) are names of data cells in the long form. controlTable[ , 2:ncol(controlTable)] (column labels) are names of data cells in the wide form. To get behavior similar to tidyr::gather/spread one builds the control table by running an appropriate query over the data.

Some discussion and examples can be found here: https://winvector.github.io/FluidData/FluidData.html and here https://github.com/WinVector/cdata.

See also

Examples

# pivot example d <- data.frame(meas = c('AUC', 'R2'), val = c(0.6, 0.2)) cT <- build_pivot_control(d, columnToTakeKeysFrom= 'meas', columnToTakeValuesFrom= 'val') blocks_to_rowrecs(d, keyColumns = NULL, controlTable = cT)
#> AUC R2 #> 1 0.6 0.2
d <- data.frame(meas = c('AUC', 'R2'), val = c(0.6, 0.2)) cT <- build_pivot_control( d, columnToTakeKeysFrom= 'meas', columnToTakeValuesFrom= 'val') ops <- rquery::local_td(d) %.>% blocks_to_rowrecs(., keyColumns = NULL, controlTable = cT) cat(format(ops))
#> mk_td("d", c( #> "meas", #> "val")) %.>% #> non_sql_node(., CREATE TEMPORARY TABLE "OUT" AS SELECT MAX( CASE WHEN CAST(a."meas" AS VARCHAR) = 'AUC' THEN a."val" ELSE NULL END ) "AUC", MAX( CASE WHEN CAST(a."meas" AS VARCHAR) = 'R2' THEN a."val" ELSE NULL END ) "R2" FROM "IN" a )
if(requireNamespace("rqdatatable", quietly = TRUE)) { library("rqdatatable") d %.>% ops %.>% print(.) }
#> Loading required package: rquery
#> AUC R2 #> 1 0.6 0.2
if(requireNamespace("RSQLite", quietly = TRUE)) { db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") DBI::dbWriteTable(db, 'd', d, overwrite = TRUE, temporary = TRUE) db %.>% ops %.>% print(.) DBI::dbDisconnect(db) }
#> AUC R2 #> 1 0.6 0.2