Build an optree pipeline that selects up to the top k rows from each group in the given order.

This is an example of building up a desired pre-prepared pipeline fragment from relop nodes.

pick_top_k(
  source,
  ...,
  partitionby = NULL,
  orderby = NULL,
  reverse = NULL,
  k = 1L,
  order_expression = "row_number()",
  order_column = "row_number",
  keep_order_column = TRUE,
  env = parent.frame()
)

Arguments

source	relop tree or data.frame source.
...	force later arguments to bind by name.
partitionby	partitioning (window function) column names.
orderby	character, ordering (in window function) column names.
reverse	character, reverse ordering (in window function) of these column names.
k	integer, number of rows to limit to in each group.
order_expression	character, command to compute row-order/rank.
order_column	character, column name to write per-group rank in (no ties).
keep_order_column	logical, if TRUE retain the order column in the result.
env	environment to look for values in.

Examples


# by hand logistic regression example
scale <- 0.237
d <- mk_td("survey_table",
           c("subjectID", "surveyCategory", "assessmentTotal"))
optree <- d %.>%
  extend(.,
             probability %:=%
               exp(assessmentTotal * scale))  %.>%
  normalize_cols(.,
                 "probability",
                 partitionby = 'subjectID') %.>%
  pick_top_k(.,
             partitionby = 'subjectID',
             orderby = c('probability', 'surveyCategory'),
             reverse = c('probability', 'surveyCategory')) %.>%
  rename_columns(., 'diagnosis' %:=% 'surveyCategory') %.>%
  select_columns(., c('subjectID',
                      'diagnosis',
                      'probability')) %.>%
  orderby(., 'subjectID')
cat(format(optree))
#> mk_td("survey_table", c(
#>   "subjectID",
#>   "surveyCategory",
#>   "assessmentTotal")) %.>%
#>  extend(.,
#>   probability := exp(assessmentTotal * 0.237)) %.>%
#>  extend(.,
#>   probability := probability / sum(probability),
#>   partitionby = c('subjectID'),
#>   orderby = c(),
#>   reverse = c()) %.>%
#>  extend(.,
#>   row_number := row_number(),
#>   partitionby = c('subjectID'),
#>   orderby = c('probability', 'surveyCategory'),
#>   reverse = c('probability', 'surveyCategory')) %.>%
#>  select_rows(.,
#>    row_number <= 1) %.>%
#>  rename_columns(.,
#>   c('diagnosis' = 'surveyCategory')) %.>%
#>  select_columns(., 
#>     c('subjectID', 'diagnosis', 'probability')) %.>%
#>  order_rows(.,
#>   c('subjectID'),
#>   reverse = c(),
#>   limit = NULL)