if(){}else{}
.R/if_else_block.R
if_else_block.Rd
This device uses expression-ifelse(,,)
to simulate the
more powerful per-row block-if(){}else{}
. The difference is
expression-ifelse(,,)
can choose per-row what value to express,
whereas block-if(){}else{}
can choose per-row where to assign multiple
values. By simulation we mean: a sequence of quoted mutate expressions
are emitted that implement the transform. These expressions can then
be optimized into a minimal number of no-dependency
blocks by extend_se
for efficient execution.
The idea is the user can write legible code in this notation, and
the translation turns it into safe and efficient code suitable for
execution either on data.frame
s or at a big data scale using
RPostgreSQL
or sparklyr
.
if_else_block(testexpr, ..., thenexprs = NULL, elseexprs = NULL)
testexpr | character containing the test expression. |
---|---|
... | force later arguments to bind by name. |
thenexprs | named character then assignments (altering columns, not creating). |
elseexprs | named character else assignments (altering columns, not creating). |
sequence of statements for extend_se().
Note: ifebtest_*
is a reserved column name for this procedure.
if (requireNamespace("DBI", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE)) { # Example: clear one of a or b in any row where both are set. # Land random selections early to avoid SQLite bug. my_db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") d <- rq_copy_to( my_db, 'd', data.frame(i = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), a = c(0, 0, 1, 1, 1, 1, 1, 1, 1, 1), b = c(0, 1, 0, 1, 1, 1, 1, 1, 1, 1), r = runif(10), edited = 0), temporary=TRUE, overwrite=TRUE) program <- if_else_block( testexpr = qe((a+b)>1), thenexprs = c( if_else_block( testexpr = qe(r >= 0.5), thenexprs = qae(a %:=% 0), elseexprs = qae(b %:=% 0)), qae(edited %:=% 1))) print(program) optree <- extend_se(d, program) cat(format(optree)) sql <- to_sql(optree, my_db) cat(sql) print(DBI::dbGetQuery(my_db, sql)) # Why we need to land the random selection early # for SQLIte: q <- "SELECT r AS r1, r AS r2 FROM ( SELECT random() AS r FROM ( SELECT * from ( VALUES(1),(2) ) ) a ) b" print(DBI::dbGetQuery(my_db, q)) DBI::dbDisconnect(my_db) }#> ifebtest_2 #> "(a + b) > 1" #> ifebtest_1 #> "r >= 0.5" #> a #> "ifelse( ifebtest_2, ifelse( ifebtest_1, 0, a), a)" #> b #> "ifelse( ifebtest_2, ifelse( ! ifebtest_1, 0, b), b)" #> edited #> "ifelse( ifebtest_2, 1, edited)" #> mk_td("d", c( #> "i", #> "a", #> "b", #> "r", #> "edited")) %.>% #> extend(., #> ifebtest_2 := (a + b) > 1, #> ifebtest_1 := r >= 0.5) %.>% #> extend(., #> a := ifelse(ifebtest_2, ifelse(ifebtest_1, 0, a), a), #> b := ifelse(ifebtest_2, ifelse(!(ifebtest_1), 0, b), b), #> edited := ifelse(ifebtest_2, 1, edited)) #> SELECT #> `i`, #> `r`, #> `ifebtest_2`, #> `ifebtest_1`, #> ( CASE WHEN ( `ifebtest_2` ) THEN ( ( CASE WHEN ( `ifebtest_1` ) THEN ( 0 ) WHEN NOT ( `ifebtest_1` ) THEN ( `a` ) ELSE NULL END ) ) WHEN NOT ( `ifebtest_2` ) THEN ( `a` ) ELSE NULL END ) AS `a`, #> ( CASE WHEN ( `ifebtest_2` ) THEN ( ( CASE WHEN ( ( NOT ( `ifebtest_1` ) ) ) THEN ( 0 ) WHEN NOT ( ( NOT ( `ifebtest_1` ) ) ) THEN ( `b` ) ELSE NULL END ) ) WHEN NOT ( `ifebtest_2` ) THEN ( `b` ) ELSE NULL END ) AS `b`, #> ( CASE WHEN ( `ifebtest_2` ) THEN ( 1 ) WHEN NOT ( `ifebtest_2` ) THEN ( `edited` ) ELSE NULL END ) AS `edited` #> FROM ( #> SELECT #> `i`, #> `a`, #> `b`, #> `r`, #> `edited`, #> ( `a` + `b` ) > 1 AS `ifebtest_2`, #> `r` >= 0.5 AS `ifebtest_1` #> FROM ( #> SELECT #> `i`, #> `a`, #> `b`, #> `r`, #> `edited` #> FROM #> `d` #> ) tsql_12809831642421032705_0000000000 #> ) tsql_12809831642421032705_0000000001 #> i r ifebtest_2 ifebtest_1 a b edited #> 1 1 0.2477650 0 0 0 0 0 #> 2 2 0.5207486 0 1 0 1 0 #> 3 3 0.1329658 0 0 1 0 0 #> 4 4 0.8159506 1 1 0 1 1 #> 5 5 0.3078428 1 0 1 0 1 #> 6 6 0.3464580 1 0 1 0 1 #> 7 7 0.3243905 1 0 1 0 1 #> 8 8 0.8299951 1 1 0 1 1 #> 9 9 0.1697801 1 0 1 0 1 #> 10 10 0.2781942 1 0 1 0 1 #> r1 r2 #> 1 481061682262933798 8617695537736990981 #> 2 515069503754126100 -7127450241748595643