This is an example of building up a desired pre-prepared pipeline fragment from relop nodes.

pick_top_k(
  source,
  ...,
  partitionby = NULL,
  orderby = NULL,
  reverse = NULL,
  k = 1L,
  order_expression = "row_number()",
  order_column = "row_number",
  keep_order_column = TRUE,
  env = parent.frame()
)

Arguments

source

relop tree or data.frame source.

...

force later arguments to bind by name.

partitionby

partitioning (window function) column names.

orderby

character, ordering (in window function) column names.

reverse

character, reverse ordering (in window function) of these column names.

k

integer, number of rows to limit to in each group.

order_expression

character, command to compute row-order/rank.

order_column

character, column name to write per-group rank in (no ties).

keep_order_column

logical, if TRUE retain the order column in the result.

env

environment to look for values in.

Examples

# by hand logistic regression example scale <- 0.237 d <- mk_td("survey_table", c("subjectID", "surveyCategory", "assessmentTotal")) optree <- d %.>% extend(., probability %:=% exp(assessmentTotal * scale)) %.>% normalize_cols(., "probability", partitionby = 'subjectID') %.>% pick_top_k(., partitionby = 'subjectID', orderby = c('probability', 'surveyCategory'), reverse = c('probability', 'surveyCategory')) %.>% rename_columns(., 'diagnosis' %:=% 'surveyCategory') %.>% select_columns(., c('subjectID', 'diagnosis', 'probability')) %.>% orderby(., 'subjectID') cat(format(optree))
#> mk_td("survey_table", c( #> "subjectID", #> "surveyCategory", #> "assessmentTotal")) %.>% #> extend(., #> probability := exp(assessmentTotal * 0.237)) %.>% #> extend(., #> probability := probability / sum(probability), #> partitionby = c('subjectID'), #> orderby = c(), #> reverse = c()) %.>% #> extend(., #> row_number := row_number(), #> partitionby = c('subjectID'), #> orderby = c('probability', 'surveyCategory'), #> reverse = c('probability', 'surveyCategory')) %.>% #> select_rows(., #> row_number <= 1) %.>% #> rename_columns(., #> c('diagnosis' = 'surveyCategory')) %.>% #> select_columns(., #> c('subjectID', 'diagnosis', 'probability')) %.>% #> order_rows(., #> c('subjectID'), #> reverse = c(), #> limit = NULL)