Build an optree pipeline that normalizes a set of columns so each column sums to one in each partition.

This is an example of building up a desired pre-prepared pipeline fragment from relop nodes.

normalize_cols(source, columns, ..., partitionby = NULL, env = parent.frame())

Arguments

source	relop tree or data.frame source.
columns	character, columns to normalize.
...	force later arguments to bind by name.
partitionby	partitioning (window function) column names to define partitions.
env	environment to look for values in.

Examples


# by hand logistic regression example
scale <- 0.237
d <- mk_td("survey_table",
                  c("subjectID", "surveyCategory", "assessmentTotal"))
optree <- d %.>%
  extend(.,
             probability %:=%
               exp(assessmentTotal * scale))  %.>%
  normalize_cols(.,
                 "probability",
                 partitionby = 'subjectID') %.>%
  pick_top_k(.,
             partitionby = 'subjectID',
             orderby = c('probability', 'surveyCategory'),
             reverse = c('probability')) %.>%
  rename_columns(., 'diagnosis' %:=% 'surveyCategory') %.>%
  select_columns(., c('subjectID',
                      'diagnosis',
                      'probability')) %.>%
  orderby(., 'subjectID')
cat(format(optree))
#> mk_td("survey_table", c(
#>   "subjectID",
#>   "surveyCategory",
#>   "assessmentTotal")) %.>%
#>  extend(.,
#>   probability := exp(assessmentTotal * 0.237)) %.>%
#>  extend(.,
#>   probability := probability / sum(probability),
#>   partitionby = c('subjectID'),
#>   orderby = c(),
#>   reverse = c()) %.>%
#>  extend(.,
#>   row_number := row_number(),
#>   partitionby = c('subjectID'),
#>   orderby = c('probability', 'surveyCategory'),
#>   reverse = c('probability')) %.>%
#>  select_rows(.,
#>    row_number <= 1) %.>%
#>  rename_columns(.,
#>   c('diagnosis' = 'surveyCategory')) %.>%
#>  select_columns(., 
#>     c('subjectID', 'diagnosis', 'probability')) %.>%
#>  order_rows(.,
#>   c('subjectID'),
#>   reverse = c(),
#>   limit = NULL)