R/normalize.R
normalize_cols.Rd
This is an example of building up a desired pre-prepared pipeline fragment from relop nodes.
normalize_cols(source, columns, ..., partitionby = NULL, env = parent.frame())
source | relop tree or data.frame source. |
---|---|
columns | character, columns to normalize. |
... | force later arguments to bind by name. |
partitionby | partitioning (window function) column names to define partitions. |
env | environment to look for values in. |
# by hand logistic regression example scale <- 0.237 d <- mk_td("survey_table", c("subjectID", "surveyCategory", "assessmentTotal")) optree <- d %.>% extend(., probability %:=% exp(assessmentTotal * scale)) %.>% normalize_cols(., "probability", partitionby = 'subjectID') %.>% pick_top_k(., partitionby = 'subjectID', orderby = c('probability', 'surveyCategory'), reverse = c('probability')) %.>% rename_columns(., 'diagnosis' %:=% 'surveyCategory') %.>% select_columns(., c('subjectID', 'diagnosis', 'probability')) %.>% orderby(., 'subjectID') cat(format(optree))#> mk_td("survey_table", c( #> "subjectID", #> "surveyCategory", #> "assessmentTotal")) %.>% #> extend(., #> probability := exp(assessmentTotal * 0.237)) %.>% #> extend(., #> probability := probability / sum(probability), #> partitionby = c('subjectID'), #> orderby = c(), #> reverse = c()) %.>% #> extend(., #> row_number := row_number(), #> partitionby = c('subjectID'), #> orderby = c('probability', 'surveyCategory'), #> reverse = c('probability')) %.>% #> select_rows(., #> row_number <= 1) %.>% #> rename_columns(., #> c('diagnosis' = 'surveyCategory')) %.>% #> select_columns(., #> c('subjectID', 'diagnosis', 'probability')) %.>% #> order_rows(., #> c('subjectID'), #> reverse = c(), #> limit = NULL)