Logistic Example

We can work an example similar to the rquery example using a data.table back-end.

library("rqdatatable")

## Loading required package: wrapr

## Loading required package: rquery

# data example
dL <- wrapr::build_frame(
   "subjectID", "surveyCategory"     , "assessmentTotal" |
   1          , "withdrawal behavior", 5                 |
   1          , "positive re-framing", 2                 |
   2          , "withdrawal behavior", 3                 |
   2          , "positive re-framing", 4                 )

scale <- 0.237

# example rquery pipeline
rquery_pipeline <- local_td(dL) %.>%
  extend_nse(.,
             one = 1) %.>%
  extend_nse(.,
             probability =
               exp(assessmentTotal * scale)/
               sum(exp(assessmentTotal * scale)),
             count = sum(one),
             partitionby = 'subjectID') %.>%
  extend_nse(.,
             rank = cumsum(one),
             partitionby = 'subjectID',
             orderby = c('probability', 'surveyCategory')) %.>%
  extend_nse(.,
             isdiagnosis = rank == count,
             diagnosis = surveyCategory) %.>%
  select_rows_nse(., 
                  isdiagnosis == TRUE) %.>%
  select_columns(., 
                 c('subjectID', 'diagnosis', 'probability')) %.>%
  orderby(., 'subjectID')

Show expanded form of query tree.

cat(format(rquery_pipeline))

mk_td("dL", c(
  "subjectID",
  "surveyCategory",
  "assessmentTotal")) %.>%
 extend(.,
  one := 1) %.>%
 extend(.,
  probability := exp(assessmentTotal * 0.237) / sum(exp(assessmentTotal * 0.237)),
  count := sum(one),
  partitionby = c('subjectID'),
  orderby = c(),
  reverse = c()) %.>%
 extend(.,
  rank := cumsum(one),
  partitionby = c('subjectID'),
  orderby = c('probability', 'surveyCategory'),
  reverse = c()) %.>%
 extend(.,
  isdiagnosis := rank == count,
  diagnosis := surveyCategory) %.>%
 select_rows(.,
   isdiagnosis == TRUE) %.>%
 select_columns(., 
    c('subjectID', 'diagnosis', 'probability')) %.>%
 order_rows(.,
  c('subjectID'),
  reverse = c(),
  limit = NULL)

Execute the calculation.

ex_data_table(rquery_pipeline)

##   subjectID           diagnosis probability
## 1         1 withdrawal behavior   0.6706221
## 2         2 positive re-framing   0.5589742

John Mount

2022-01-22