Operators

cdata recommends an operator idiom to apply data transforms.

The idea is simple, yet powerful.

First let’s start with some data.


d <- wrapr::build_frame(
   "model_id"  , "measure", "value" |
     1         , "AUC"    , 0.7     |
     1         , "R2"     , 0.4     |
     2         , "AUC"    , 0.8     |
     2         , "R2"     , 0.5     )

knitr::kable(d)

model_id	measure	value
1	AUC	0.7
1	R2	0.4
2	AUC	0.8
2	R2	0.5

In the above data we have two measurements each for two individuals (individuals identified by the “model_id” column). Using cdata’s rowrecs_to_blocks_spec() method we can capture a description of this record structure and transformation details.

library("cdata")
#> Loading required package: wrapr

transform <- rowrecs_to_blocks_spec(
  wrapr::qchar_frame(
    "measure", "value" |
    "AUC"    , AUC     |
    "R2"     , R2      ),
  recordKeys = "model_id")

print(transform)
#> {
#>  row_record <- wrapr::qchar_frame(
#>    "model_id"  , "AUC", "R2" |
#>      .         , AUC  , R2   )
#>  row_keys <- c('model_id')
#> 
#>  # becomes
#> 
#>  block_record <- wrapr::qchar_frame(
#>    "model_id"  , "measure", "value" |
#>      .         , "AUC"    , AUC     |
#>      .         , "R2"     , R2      )
#>  block_keys <- c('model_id', 'measure')
#> 
#>  # args: c(checkNames = TRUE, checkKeys = FALSE, strict = FALSE, allow_rqdatatable = FALSE)
#> }

Once we have this specification we can transform the data using operator notation.

We can collect the record blocks into rows by a “factor-out” (or aggregation/projection) step.

knitr::kable(d)

model_id	measure	value
1	AUC	0.7
1	R2	0.4
2	AUC	0.8
2	R2	0.5


d2 <- d %//% t(transform)

knitr::kable(d2)

model_id	AUC	R2
1	0.7	0.4
2	0.8	0.5


# (or using general pipe notation 
# includng the .() "execute immediately")
d %.>% 
  .(t(transform)) %.>% 
  knitr::kable(.)

model_id	AUC	R2
1	0.7	0.4
2	0.8	0.5

We can expand record rows into blocks by a “multiplication” (or join) step.

knitr::kable(d2)

model_id	AUC	R2
1	0.7	0.4
2	0.8	0.5


d3 <- d2 %**% transform

knitr::kable(d3)

model_id	measure	value
1	AUC	0.7
1	R2	0.4
2	AUC	0.8
2	R2	0.5


# (or using general pipe notation)
d2 %.>% 
  transform %.>% 
  knitr::kable(.)

model_id	measure	value
1	AUC	0.7
1	R2	0.4
2	AUC	0.8
2	R2	0.5

(%//% and %**% being two operators introduced by the cdata package.)

And the two specialized operators have an inverse/adjoint relation.

knitr::kable(d)

model_id	measure	value
1	AUC	0.7
1	R2	0.4
2	AUC	0.8
2	R2	0.5


# identity
d4 <- d %//% t(transform) %**% transform

knitr::kable(d4)

model_id	measure	value
1	AUC	0.7
1	R2	0.4
2	AUC	0.8
2	R2	0.5

We can also pipe into the spec (and into its adjoint) using the wrapr dot pipe operator.

# reverse or adjoint/transpose operation specification
t_record_spec <- t(transform)

d %.>% 
  t_record_spec %.>%
  knitr::kable(.)

model_id	AUC	R2
1	0.7	0.4
2	0.8	0.5


# using dot-pipe's bquote style .() execute immediate notation
d %.>% 
  .(t(transform)) %.>%
  knitr::kable(.)

model_id	AUC	R2
1	0.7	0.4
2	0.8	0.5


# identity
d %.>% 
  .(t(transform)) %.>% 
  transform %.>%
  knitr::kable(.)

model_id	measure	value
1	AUC	0.7
1	R2	0.4
2	AUC	0.8
2	R2	0.5

And, of course, the exact same functionality for database tables.

have_db <- requireNamespace("DBI", quietly = TRUE) &&
   requireNamespace("RSQLite", quietly = TRUE)

raw_connection <- DBI::dbConnect(RSQLite::SQLite(), 
                                 ":memory:")
RSQLite::initExtension(raw_connection)
db <- rquery::rquery_db_info(
  connection = raw_connection,
  is_dbi = TRUE,
  connection_options = rquery::rq_connection_tests(raw_connection))

d_td <- rquery::rq_copy_to(db, "d", d)

ops <- d_td %//% t(transform)
cat(format(ops))
#> mk_td("d", c(
#>   "model_id",
#>   "measure",
#>   "value")) %.>%
#>  non_sql_node(., CREATE TEMPORARY TABLE "OUT" AS  SELECT a."model_id" "model_id", MAX( CASE WHEN CAST(a."measure" AS VARCHAR) = 'AUC' THEN a."value" ELSE NULL END ) "AUC", MAX( CASE WHEN CAST(a."measure" AS VARCHAR) = 'R2' THEN a."value" ELSE NULL END ) "R2" FROM "IN" a GROUP BY a."model_id")

rquery::execute(db, ops) %.>%
  knitr::kable(.)

model_id	AUC	R2
1	0.7	0.4
2	0.8	0.5


d_td %.>% 
  .(t(transform)) %.>%
  rquery::execute(db, .) %.>%
  knitr::kable(.)

model_id	AUC	R2
1	0.7	0.4
2	0.8	0.5

DBI::dbDisconnect(raw_connection)

John Mount, Nina Zumel

2021-06-11