rquery re-maps a number of symbols during SQL translation.
During expression parsing the internal rquery function tokenize_call_for_SQL() implements the following re-mappings from R idioms to SQL notation.
## Loading required package: wrapr
library("wrapr") show_translation <- function(strings) { vapply(strings, function(si) { format(rquery::tokenize_for_SQL(parse(text = si, keep.source = FALSE)[[1]], colnames = NULL)$parsed_toks) }, character(1)) } mapping_table <- data.frame( example = c('!x', 'is.na(x)', 'ifelse(a, b, c)', 'a^b', 'a%%b', 'a==b', 'a&&b', 'a&b', 'a||b', 'a|b', 'pmin(a, b)', 'pmax(a, b)'), stringsAsFactors = FALSE) mapping_table$translation <- show_translation(mapping_table$example) knitr::kable(mapping_table)
| example | translation |
|---|---|
| !x | ( NOT ( x ) ) |
| is.na(x) | ( ( x ) IS NULL ) |
| ifelse(a, b, c) | ( CASE WHEN ( a ) THEN ( b ) WHEN NOT ( a ) THEN ( c ) ELSE NULL END ) |
| a^b | POWER ( a , b ) |
| a%%b | MOD ( a , b ) |
| a==b | a = b |
| a&&b | a AND b |
| a&b | a AND b |
| a||b | a OR b |
| a|b | a OR b |
| pmin(a, b) | ( CASE WHEN ( a ) IS NULL THEN ( b ) WHEN ( b ) IS NULL THEN ( a ) WHEN ( a ) <= ( b ) THEN ( a ) ELSE ( b ) END ) |
| pmax(a, b) | ( CASE WHEN ( a ) IS NULL THEN ( b ) WHEN ( b ) IS NULL THEN ( a ) WHEN ( a ) >= ( b ) THEN ( a ) ELSE ( b ) END ) |
Note: not all possible mappings are implemented. For example we currently do not re-map %in%, preferring the user to explicitly work with set_indicator() directly.
In addition to this the database connectors can specify additional re-mappings. This can be found by building a formal connector and inspecting the re-mappings.
have_RSQLite <- requireNamespace("RSQLite", quietly = TRUE)
raw_RSQLite_connection <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") RSQLite::initExtension(raw_RSQLite_connection) db <- rquery_db_info( connection = raw_RSQLite_connection, is_dbi = TRUE, connection_options = rq_connection_tests(raw_RSQLite_connection)) fn_name_map <- db$connection_options[[paste0("rquery.", rq_connection_name(db), ".", "fn_name_map")]] fn_name_map
## NULL
We see above that “mean” is re-mapped to “avg”.
In all cases we can see what re-mappings happen by examining a query.
d_local <- build_frame( "subjectID", "surveyCategory" , "assessmentTotal", "irrelevantCol1", "irrelevantCol2" | 1L , "withdrawal behavior", 5 , "irrel1" , "irrel2" | 1L , "positive re-framing", 2 , "irrel1" , "irrel2" | 3L , "withdrawal behavior", 3 , "irrel1" , "irrel2" | 3L , "positive re-framing", 2 , "irrel1" , "irrel2" | 3L , "other" , 1 , "irrel1" , "irrel2" ) table_handle <- rq_copy_to(db, 'd', d_local, temporary = TRUE, overwrite = TRUE) print(table_handle)
## [1] "mk_td(\"d\", c( \"subjectID\", \"surveyCategory\", \"assessmentTotal\", \"irrelevantCol1\", \"irrelevantCol2\"))"
## subjectID surveyCategory assessmentTotal irrelevantCol1 irrelevantCol2
## 1 1 withdrawal behavior 5 irrel1 irrel2
## 2 1 positive re-framing 2 irrel1 irrel2
## 3 3 withdrawal behavior 3 irrel1 irrel2
## 4 3 positive re-framing 2 irrel1 irrel2
## 5 3 other 1 irrel1 irrel2
ops <- table_handle %.>% project(., avg_total := avg(pmax(0, assessmentTotal)), groupby = "subjectID") cat(to_sql(ops, db))
## SELECT `subjectID`, avg ( ( CASE WHEN ( 0 ) IS NULL THEN ( `assessmentTotal` ) WHEN ( `assessmentTotal` ) IS NULL THEN ( 0 ) WHEN ( 0 ) >= ( `assessmentTotal` ) THEN ( 0 ) ELSE ( `assessmentTotal` ) END ) ) AS `avg_total` FROM (
## SELECT
## `subjectID`,
## `assessmentTotal`
## FROM
## `d`
## ) tsql_45150444179299580621_0000000000
## GROUP BY
## `subjectID`
| subjectID | avg_total |
|---|---|
| 1 | 3.5 |
| 3 | 2.0 |
The basic mappings are stored in database option structures, and depend on the database. For example MOD is re-mapped back to % for RSQLite.
rquery::rq_function_mappings(db) %.>% knitr::kable(.)
| fn_name | sql_mapping |
|---|---|
| as.Date | to_date ( .(3) , ‘YYYY-MM-DD’ ) |
| n | COUNT ( 1 ) |
| mean | AVG ( .(3) ) |
| cumsum | SUM ( .(3) ) |
| shift | LAG ( .(3) ) |
| MOD | ( .(3) % .(5) ) |
| rand | ABS ( ( RANDOM ( ) % 268435456 ) / 268435455.0 ) |
ops <- table_handle %.>% project(., groupby = "subjectID", n := 5, count := n(), mean := mean(assessmentTotal)) %.>% extend(., was_n := n) cat(to_sql(ops, db))
## SELECT
## `subjectID`,
## `n`,
## `count`,
## `mean`,
## `n` AS `was_n`
## FROM (
## SELECT `subjectID`, 5 AS `n`, COUNT ( 1 ) AS `count`, AVG ( `assessmentTotal` ) AS `mean` FROM (
## SELECT
## `subjectID`,
## `assessmentTotal`
## FROM
## `d`
## ) tsql_86045918521331246802_0000000000
## GROUP BY
## `subjectID`
## ) tsql_86045918521331246802_0000000001
| subjectID | n | count | mean | was_n |
|---|---|---|---|---|
| 1 | 5 | 2 | 3.5 | 5 |
| 3 | 5 | 3 | 2.0 | 5 |
Additional function re-mappings can be specified by user code. One such example is re-writing MOD as % for RSQLite.
ops <- table_handle %.>% extend(., z := 1 + subjectID %% 3) %.>% select_columns(., c("subjectID", "z")) cat(to_sql(ops, db))
## SELECT
## `subjectID`,
## `z`
## FROM (
## SELECT
## `subjectID`,
## 1 + ( `subjectID` % 3 ) AS `z`
## FROM (
## SELECT
## `subjectID`
## FROM
## `d`
## ) tsql_96503174598613074717_0000000000
## ) tsql_96503174598613074717_0000000001
| subjectID | z |
|---|---|
| 1 | 2 |
| 1 | 2 |
| 3 | 1 |
| 3 | 1 |
| 3 | 1 |
rqdatatable also supplies some re-mappings (described here). This can allow us to use a uniform notation for tasks such as random number generation to allow portable pipelines.
DBI::dbDisconnect(raw_RSQLite_connection)