Make a general SQL node.
sql_node( source, exprs, ..., mods = NULL, orig_columns = TRUE, expand_braces = TRUE, translate_quotes = TRUE, env = parent.frame() )
source | source to work from. |
---|---|
exprs | SQL expressions |
... | force later arguments to bind by name |
mods | SQL modifiers (GROUP BY, ORDER BY, and so on) |
orig_columns | logical if TRUE select all original columns. |
expand_braces | logical if TRUE use col notation to ensure col is a column name. |
translate_quotes | logical if TRUE translate quotes to SQL choice (simple replacement, no escaping). |
env | environment to look to. |
sql node.
if (requireNamespace("DBI", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE)) { # example database connection my_db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") # load up example data d <- rq_copy_to( my_db, 'd', data.frame(v1 = c(1, 2, NA, 3), v2 = c(NA, "b", NA, "c"), v3 = c(NA, NA, 7, 8), stringsAsFactors = FALSE)) # look at table execute(my_db, d) # get list of columns vars <- column_names(d) print(vars) # build a NA/NULLs per-row counting expression. # names are "quoted" by wrapping them with as.name(). # constants can be quoted by an additional list wrapping. expr <- lapply(vars, function(vi) { list("+ (CASE WHEN (", as.name(vi), "IS NULL ) THEN 1.0 ELSE 0.0 END)") }) expr <- unlist(expr, recursive = FALSE) expr <- c(list(0.0), expr) cat(paste(unlist(expr), collapse = " ")) # instantiate the operator node op_tree <- d %.>% sql_node(., "num_missing" %:=% list(expr)) cat(format(op_tree)) # examine produced SQL sql <- to_sql(op_tree, my_db) cat(sql) # execute execute(my_db, op_tree) %.>% print(.) # whole process wrapped in convenience node op_tree2 <- d %.>% count_null_cols(., vars, "nnull") execute(my_db, op_tree2) %.>% print(.) # sql_node also allows marking variable in quoted expressions ops <- d %.>% sql_node(., qae(sqrt_v1 = sqrt(.[v1]))) execute(my_db, ops) %.>% print(.) # marking variables allows for error-checking of column names tryCatch({ ops <- d %.>% sql_node(., qae(sqrt_v1 = sqrt(.[v1_misspellled]))) }, error = function(e) {print(e)}) DBI::dbDisconnect(my_db) }#> [1] "v1" "v2" "v3" #> 0 + (CASE WHEN ( v1 IS NULL ) THEN 1.0 ELSE 0.0 END) + (CASE WHEN ( v2 IS NULL ) THEN 1.0 ELSE 0.0 END) + (CASE WHEN ( v3 IS NULL ) THEN 1.0 ELSE 0.0 END)mk_td("d", c( #> "v1", #> "v2", #> "v3")) %.>% #> sql_node(., #> num_missing %:=% 0 + (CASE WHEN ( v1 IS NULL ) THEN 1.0 ELSE 0.0 END) + (CASE WHEN ( v2 IS NULL ) THEN 1.0 ELSE 0.0 END) + (CASE WHEN ( v3 IS NULL ) THEN 1.0 ELSE 0.0 END), #> *=TRUE) #> SELECT #> `v3` AS `v3`, #> 0 + (CASE WHEN ( `v1` IS NULL ) THEN 1.0 ELSE 0.0 END) + (CASE WHEN ( `v2` IS NULL ) THEN 1.0 ELSE 0.0 END) + (CASE WHEN ( `v3` IS NULL ) THEN 1.0 ELSE 0.0 END) AS `num_missing`, #> `v1` AS `v1`, #> `v2` AS `v2` #> FROM ( #> SELECT #> `v1`, #> `v2`, #> `v3` #> FROM #> `d` #> ) tsql_45796301653839205628_0000000000 #> v3 num_missing v1 v2 #> 1 NA 2 1 <NA> #> 2 NA 1 2 b #> 3 7 2 NA <NA> #> 4 8 0 3 c #> v3 nnull v1 v2 #> 1 NA 2 1 <NA> #> 2 NA 1 2 b #> 3 7 2 NA <NA> #> 4 8 0 3 c #> v3 sqrt_v1 v1 v2 #> 1 NA 1.000000 1 <NA> #> 2 NA 1.414214 2 b #> 3 7 NA NA <NA> #> 4 8 1.732051 3 c #> <simpleError in sql_node.relop(., qae(sqrt_v1 = sqrt(.[v1_misspellled]))): rquery::sql_node.relop undefined columns: v1_misspellled>