diff --git a/docs/404.html b/docs/404.html index 4040fc6..504b67e 100644 --- a/docs/404.html +++ b/docs/404.html @@ -67,7 +67,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 80e6b39..e9dc91a 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -67,7 +67,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/articles/GroupedSampling.html b/docs/articles/GroupedSampling.html index 257e93d..1fff54b 100644 --- a/docs/articles/GroupedSampling.html +++ b/docs/articles/GroupedSampling.html @@ -31,7 +31,7 @@ rqdatatable - 1.2.3 + 1.2.4 @@ -91,7 +91,7 @@

Grouped Sampling

John Mount

-

2019-10-23

+

2019-11-12

Source: vignettes/GroupedSampling.Rmd @@ -126,12 +126,12 @@

2019-10-23

samp <- ex_data_table(sample_ops) head(samp)
##    x y z    id one        ord row_number
-## 1: a a a   909   1 0.04891075          1
-## 2: a a a  6960   1 0.08585692          2
-## 3: a a a 27931   1 0.20268071          3
-## 4: a a a 13108   1 0.25831462          4
-## 5: a a a 67683   1 0.66286079          5
-## 6: a a b  4168   1 0.01941657          1
+## 1: a a a 99342 1 0.29592013 1 +## 2: a a a 41989 1 0.45891485 2 +## 3: a a a 12254 1 0.99895914 3 +## 4: a a b 30135 1 0.09357431 1 +## 5: a a b 20897 1 0.22383060 2 +## 6: a a b 59731 1 0.54500068 3

And the database version is very similar (on databases with window functions).

The main issue is landing the random order without having to translate the R runif(sum(one)) code into database operations.

library("rquery")
@@ -167,13 +167,13 @@ 

2019-10-23

samp <- ex_data_table(sample_ops) head(samp)
-
##    x y z    id          ord row_number
-## 1: a a a  6960 0.0623182002          1
-## 2: a a a 58463 0.3283458794          2
-## 3: a a a 27931 0.5018074675          3
-## 4: a a a 67683 0.8144518612          4
-## 5: a a a 13108 0.9170139052          5
-## 6: a a b 39807 0.0001303996          1
+
##    x y z    id        ord row_number
+## 1: a a a 12254 0.47605028          1
+## 2: a a a 41989 0.61569890          2
+## 3: a a a 99342 0.87579154          3
+## 4: a a b 25056 0.02373051          1
+## 5: a a b 59731 0.03065273          2
+## 6: a a b 30135 0.25620100          3

The translations available are listed in the package variable rqdatatable:::data_table_extend_fns.

str(rqdatatable:::data_table_extend_fns)
## List of 6
diff --git a/docs/articles/R_mapping.html b/docs/articles/R_mapping.html
index 21ab94f..d5bbdb4 100644
--- a/docs/articles/R_mapping.html
+++ b/docs/articles/R_mapping.html
@@ -31,7 +31,7 @@
       
       
         rqdatatable
-        1.2.3
+        1.2.4
       
     
 
@@ -91,7 +91,7 @@
       

R mapping

John Mount

-

2019-10-23

+

2019-11-12

Source: vignettes/R_mapping.Rmd diff --git a/docs/articles/index.html b/docs/articles/index.html index 001b5ee..40f6451 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -67,7 +67,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/articles/logisticexample.html b/docs/articles/logisticexample.html index 24853d0..77d0c05 100644 --- a/docs/articles/logisticexample.html +++ b/docs/articles/logisticexample.html @@ -31,7 +31,7 @@ rqdatatable - 1.2.3 + 1.2.4 @@ -91,7 +91,7 @@

Logistic Example

John Mount

-

2019-10-23

+

2019-11-12

Source: vignettes/logisticexample.Rmd @@ -104,37 +104,36 @@

2019-10-23

library("rqdatatable")
## Loading required package: rquery
library("data.table")
-
## Warning: package 'data.table' was built under R version 3.5.2
- -
scale <- 0.237
-
-# example rquery pipeline
-rquery_pipeline <- local_td(dL) %.>%
-  extend_nse(.,
-             one = 1) %.>%
-  extend_nse(.,
-             probability =
-               exp(assessmentTotal * scale)/
-               sum(exp(assessmentTotal * scale)),
-             count = sum(one),
-             rank = rank(probability, surveyCategory),
-             partitionby = 'subjectID') %.>%
-  extend_nse(.,
-             isdiagnosis = rank == count,
-             diagnosis = surveyCategory) %.>%
-  select_rows_nse(., 
-                  isdiagnosis == TRUE) %.>%
-  select_columns(., 
-                 c('subjectID', 'diagnosis', 'probability')) %.>%
-  orderby(., 'subjectID')
+ +
scale <- 0.237
+
+# example rquery pipeline
+rquery_pipeline <- local_td(dL) %.>%
+  extend_nse(.,
+             one = 1) %.>%
+  extend_nse(.,
+             probability =
+               exp(assessmentTotal * scale)/
+               sum(exp(assessmentTotal * scale)),
+             count = sum(one),
+             rank = rank(probability, surveyCategory),
+             partitionby = 'subjectID') %.>%
+  extend_nse(.,
+             isdiagnosis = rank == count,
+             diagnosis = surveyCategory) %.>%
+  select_rows_nse(., 
+                  isdiagnosis == TRUE) %.>%
+  select_columns(., 
+                 c('subjectID', 'diagnosis', 'probability')) %.>%
+  orderby(., 'subjectID')

Show expanded form of query tree.

-
cat(format(rquery_pipeline))
+
cat(format(rquery_pipeline))
mk_td("dL", c(
   "subjectID",
   "surveyCategory",
@@ -157,14 +156,14 @@ 

2019-10-23

diagnosis := surveyCategory) %.>% select_rows(., isdiagnosis == TRUE) %.>% - select_columns(., c( - "subjectID", "diagnosis", "probability")) %.>% + select_columns(., + c('subjectID', 'diagnosis', 'probability')) %.>% order_rows(., c('subjectID'), reverse = c(), limit = NULL)

Execute the calculation.

-
ex_data_table(rquery_pipeline)
+
ex_data_table(rquery_pipeline)
##    subjectID           diagnosis probability
 ## 1:         1 withdrawal behavior   0.6706221
 ## 2:         2 positive re-framing   0.5589742
diff --git a/docs/authors.html b/docs/authors.html index 8d7b42d..3406a00 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -67,7 +67,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/index.html b/docs/index.html index c860db9..3c91cbd 100644 --- a/docs/index.html +++ b/docs/index.html @@ -32,7 +32,7 @@ rqdatatable - 1.2.3 + 1.2.4 @@ -94,7 +94,7 @@

A Python version of rquery/rqdatatable is under initial development as data_algebra.

For example scoring a logistic regression model (which requires grouping, ordering, and ranking) is organized as follows. For more on this example please see “Let’s Have Some Sympathy For The Part-time R User”.

library("rqdatatable")
- +

And execute it using data.table.

ex_data_table(rquery_pipeline)
- +

One can also apply the pipeline to new tables.

- +

Initial bench-marking of rqdatatable is very favorable (notes here).

Note rqdatatable has an “immediate mode” which allows direct application of pipelines stages without pre-assembling the pipeline. “Immediate mode” is a convenience for ad-hoc analyses, and has some negative performance impact, so we encourage users to build pipelines for most work. Some notes on the issue can be found here.

rqdatatable implements the rquery grammar in the style of a “Turing or Cook reduction” (implementing the result in terms of multiple oracle calls to the related system). For some related work on translating dplyr to data.table in the style of “Karp reductions” (implementing the result as a translation into a single problem in the related system) please see table.express and also dtplyr (though the 0.0.3 and prior versions of dtplyr have severe performance issues due to excess table copying, ref, ref).

diff --git a/docs/news/index.html b/docs/news/index.html index e48b4f0..0111520 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -67,7 +67,7 @@ rqdatatable - 1.2.3 + 1.2.4 @@ -128,9 +128,19 @@

Changelog

Source: NEWS.md +
+

+rqdatatable 1.2.4 2019/11/12 Unreleased +

+
    +
  • Remove unused methods import.
  • +
  • More tests on drop_columns.
  • +
  • Catch degenerate project.
  • +
+

-rqdatatable 1.2.3 2019/10/23 Unreleased +rqdatatable 1.2.3 2019/10/23 2019-10-24

  • Correct how we remove columns to use proper data.table notation.
  • @@ -309,6 +319,7 @@

    Contents

    diff --git a/docs/reference/ex_data_table.html b/docs/reference/ex_data_table.html index fcbff45..d17b3fd 100644 --- a/docs/reference/ex_data_table.html +++ b/docs/reference/ex_data_table.html @@ -69,7 +69,7 @@ rqdatatable - 1.2.3 + 1.2.4

@@ -201,8 +201,8 @@

Examp #> "x", #> "y", #> "z")) %.>% -#> select_columns(., c( -#> "x", "y")) %.>% +#> select_columns(., +#> c('x', 'y')) %.>% #> select_rows(., #> x < 2 & y < 30)
ex_data_table(optree)
#> x y #> 1: 1 20
diff --git a/docs/reference/ex_data_table.relop_drop_columns.html b/docs/reference/ex_data_table.relop_drop_columns.html index 60836ef..c78501b 100644 --- a/docs/reference/ex_data_table.relop_drop_columns.html +++ b/docs/reference/ex_data_table.relop_drop_columns.html @@ -69,7 +69,7 @@ rqdatatable - 1.2.3 + 1.2.4
@@ -174,8 +174,8 @@

Examp dL <- data.frame(x = 1, y = 2, z = 3) rquery_pipeline <- local_td(dL) %.>% drop_columns(., "y") -ex_data_table(rquery_pipeline)
#> x y z -#> 1: 1 2 3
+ex_data_table(rquery_pipeline)
#> x z +#> 1: 1 3

diff --git a/docs/reference/ex_data_table.relop_natural_join.html b/docs/reference/ex_data_table.relop_natural_join.html index 203da12..69b8f05 100644 --- a/docs/reference/ex_data_table.relop_natural_join.html +++ b/docs/reference/ex_data_table.relop_natural_join.html @@ -69,7 +69,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/ex_data_table.relop_non_sql.html b/docs/reference/ex_data_table.relop_non_sql.html index 405cb81..3c9d525 100644 --- a/docs/reference/ex_data_table.relop_non_sql.html +++ b/docs/reference/ex_data_table.relop_non_sql.html @@ -69,7 +69,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/ex_data_table.relop_null_replace.html b/docs/reference/ex_data_table.relop_null_replace.html index f822a24..6d2b408 100644 --- a/docs/reference/ex_data_table.relop_null_replace.html +++ b/docs/reference/ex_data_table.relop_null_replace.html @@ -69,7 +69,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/ex_data_table.relop_order_expr.html b/docs/reference/ex_data_table.relop_order_expr.html index 7503875..fdfb160 100644 --- a/docs/reference/ex_data_table.relop_order_expr.html +++ b/docs/reference/ex_data_table.relop_order_expr.html @@ -69,7 +69,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/ex_data_table.relop_orderby.html b/docs/reference/ex_data_table.relop_orderby.html index 3c8ab48..ef0de0e 100644 --- a/docs/reference/ex_data_table.relop_orderby.html +++ b/docs/reference/ex_data_table.relop_orderby.html @@ -69,7 +69,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/ex_data_table.relop_project.html b/docs/reference/ex_data_table.relop_project.html index c0291a3..ba52480 100644 --- a/docs/reference/ex_data_table.relop_project.html +++ b/docs/reference/ex_data_table.relop_project.html @@ -69,7 +69,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/ex_data_table.relop_rename_columns.html b/docs/reference/ex_data_table.relop_rename_columns.html index 8664eeb..fbc9c45 100644 --- a/docs/reference/ex_data_table.relop_rename_columns.html +++ b/docs/reference/ex_data_table.relop_rename_columns.html @@ -69,7 +69,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/ex_data_table.relop_select_columns.html b/docs/reference/ex_data_table.relop_select_columns.html index e0e36b3..132befb 100644 --- a/docs/reference/ex_data_table.relop_select_columns.html +++ b/docs/reference/ex_data_table.relop_select_columns.html @@ -69,7 +69,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/ex_data_table.relop_select_rows.html b/docs/reference/ex_data_table.relop_select_rows.html index 62d30e6..7562e96 100644 --- a/docs/reference/ex_data_table.relop_select_rows.html +++ b/docs/reference/ex_data_table.relop_select_rows.html @@ -69,7 +69,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/ex_data_table.relop_set_indicator.html b/docs/reference/ex_data_table.relop_set_indicator.html index 7e97617..25ca4c2 100644 --- a/docs/reference/ex_data_table.relop_set_indicator.html +++ b/docs/reference/ex_data_table.relop_set_indicator.html @@ -69,7 +69,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/ex_data_table.relop_sql.html b/docs/reference/ex_data_table.relop_sql.html index 36ef18b..ba34fb4 100644 --- a/docs/reference/ex_data_table.relop_sql.html +++ b/docs/reference/ex_data_table.relop_sql.html @@ -71,7 +71,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/ex_data_table.relop_table_source.html b/docs/reference/ex_data_table.relop_table_source.html index aa94dcd..2e28636 100644 --- a/docs/reference/ex_data_table.relop_table_source.html +++ b/docs/reference/ex_data_table.relop_table_source.html @@ -72,7 +72,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/ex_data_table.relop_theta_join.html b/docs/reference/ex_data_table.relop_theta_join.html index 965c928..614aea0 100644 --- a/docs/reference/ex_data_table.relop_theta_join.html +++ b/docs/reference/ex_data_table.relop_theta_join.html @@ -69,7 +69,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/ex_data_table.relop_unionall.html b/docs/reference/ex_data_table.relop_unionall.html index e64d579..545a58c 100644 --- a/docs/reference/ex_data_table.relop_unionall.html +++ b/docs/reference/ex_data_table.relop_unionall.html @@ -69,7 +69,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/ex_data_table_parallel.html b/docs/reference/ex_data_table_parallel.html index 451a1cc..c94dfd8 100644 --- a/docs/reference/ex_data_table_parallel.html +++ b/docs/reference/ex_data_table_parallel.html @@ -72,7 +72,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/index.html b/docs/reference/index.html index 70e2af3..49468ab 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -67,7 +67,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/layout_to_blocks_data_table.html b/docs/reference/layout_to_blocks_data_table.html index ce0c582..a4cf343 100644 --- a/docs/reference/layout_to_blocks_data_table.html +++ b/docs/reference/layout_to_blocks_data_table.html @@ -71,7 +71,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/layout_to_rowrecs_data_table.html b/docs/reference/layout_to_rowrecs_data_table.html index 239a3d5..1fc91ca 100644 --- a/docs/reference/layout_to_rowrecs_data_table.html +++ b/docs/reference/layout_to_rowrecs_data_table.html @@ -71,7 +71,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/make_dt_lookup_by_column.html b/docs/reference/make_dt_lookup_by_column.html index 2ad8103..2e37941 100644 --- a/docs/reference/make_dt_lookup_by_column.html +++ b/docs/reference/make_dt_lookup_by_column.html @@ -70,7 +70,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/rbindlist_data_table.html b/docs/reference/rbindlist_data_table.html index 7049070..0155c35 100644 --- a/docs/reference/rbindlist_data_table.html +++ b/docs/reference/rbindlist_data_table.html @@ -69,7 +69,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/reexports.html b/docs/reference/reexports.html index 84b83b6..958d661 100644 --- a/docs/reference/reexports.html +++ b/docs/reference/reexports.html @@ -78,7 +78,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/rq_df_funciton_node.html b/docs/reference/rq_df_funciton_node.html index 3af3aa9..e966b26 100644 --- a/docs/reference/rq_df_funciton_node.html +++ b/docs/reference/rq_df_funciton_node.html @@ -69,7 +69,7 @@ rqdatatable - 1.2.3 + 1.2.4 @@ -221,17 +221,17 @@

Examp #> "y", #> "group")) %.>% #> non_sql_node(., y~x grouped by group)
-ex_data_table(rquery_pipeline)
#> Estimate Std. Error t value Pr(>|t|) Variable group -#> 1: 0.090716766 0.07125705 1.2730918 0.20447817 (Intercept) a -#> 2: -0.044853006 0.06701987 -0.6692494 0.50411599 x a -#> 3: 0.063457904 0.06802746 0.9328278 0.35208658 (Intercept) b -#> 4: 0.058692951 0.06868752 0.8544922 0.39390284 x b -#> 5: 0.129306526 0.07003884 1.8462116 0.06650366 (Intercept) c -#> 6: 0.012482453 0.07217097 0.1729567 0.86287979 x c -#> 7: 0.031991890 0.06543806 0.4888881 0.62544100 (Intercept) d -#> 8: -0.008130064 0.06308408 -0.1288766 0.89758099 x d -#> 9: -0.012349987 0.06804600 -0.1814947 0.85615030 (Intercept) e -#> 10: -0.008883978 0.07675926 -0.1157382 0.90796806 x e
+ex_data_table(rquery_pipeline)
#> Estimate Std. Error t value Pr(>|t|) Variable group +#> 1: 0.05921097 0.06246165 0.9479572 0.34421552 (Intercept) a +#> 2: -0.02301646 0.06093971 -0.3776924 0.70603174 x a +#> 3: 0.09793586 0.06666844 1.4689988 0.14335117 (Intercept) b +#> 4: 0.05703537 0.06963630 0.8190466 0.41370179 x b +#> 5: -0.05184909 0.07556010 -0.6861967 0.49348193 (Intercept) c +#> 6: 0.05554476 0.08019680 0.6926057 0.48945965 x c +#> 7: 0.15331654 0.07004124 2.1889469 0.02985964 (Intercept) d +#> 8: 0.02056881 0.06921107 0.2971896 0.76665700 x d +#> 9: 0.02250647 0.06919627 0.3252556 0.74531773 (Intercept) e +#> 10: -0.08785792 0.06864886 -1.2798162 0.20204920 x e
@@ -217,17 +217,17 @@

Examp #> "y", #> "group")) %.>% #> non_sql_node(., y~x grouped by group grouped by group)
-ex_data_table(rquery_pipeline)
#> Estimate Std_Error t_value p_value Variable group -#> 1: 0.090716766 0.07125705 1.2730918 0.20447817 (Intercept) a -#> 2: -0.044853006 0.06701987 -0.6692494 0.50411599 x a -#> 3: 0.063457904 0.06802746 0.9328278 0.35208658 (Intercept) b -#> 4: 0.058692951 0.06868752 0.8544922 0.39390284 x b -#> 5: 0.129306526 0.07003884 1.8462116 0.06650366 (Intercept) c -#> 6: 0.012482453 0.07217097 0.1729567 0.86287979 x c -#> 7: 0.031991890 0.06543806 0.4888881 0.62544100 (Intercept) d -#> 8: -0.008130064 0.06308408 -0.1288766 0.89758099 x d -#> 9: -0.012349987 0.06804600 -0.1814947 0.85615030 (Intercept) e -#> 10: -0.008883978 0.07675926 -0.1157382 0.90796806 x e
+ex_data_table(rquery_pipeline)
#> Estimate Std_Error t_value p_value Variable group +#> 1: 0.05921097 0.06246165 0.9479572 0.34421552 (Intercept) a +#> 2: -0.02301646 0.06093971 -0.3776924 0.70603174 x a +#> 3: 0.09793586 0.06666844 1.4689988 0.14335117 (Intercept) b +#> 4: 0.05703537 0.06963630 0.8190466 0.41370179 x b +#> 5: -0.05184909 0.07556010 -0.6861967 0.49348193 (Intercept) c +#> 6: 0.05554476 0.08019680 0.6926057 0.48945965 x c +#> 7: 0.15331654 0.07004124 2.1889469 0.02985964 (Intercept) d +#> 8: 0.02056881 0.06921107 0.2971896 0.76665700 x d +#> 9: 0.02250647 0.06919627 0.3252556 0.74531773 (Intercept) e +#> 10: -0.08785792 0.06864886 -1.2798162 0.20204920 x e
diff --git a/docs/reference/rqdatatable.html b/docs/reference/rqdatatable.html index a3b5a76..2521e96 100644 --- a/docs/reference/rqdatatable.html +++ b/docs/reference/rqdatatable.html @@ -70,7 +70,7 @@ rqdatatable - 1.2.3 + 1.2.4 diff --git a/docs/reference/run_rqdatatable_tests.html b/docs/reference/run_rqdatatable_tests.html index 3e29426..f078a35 100644 --- a/docs/reference/run_rqdatatable_tests.html +++ b/docs/reference/run_rqdatatable_tests.html @@ -72,7 +72,7 @@ rqdatatable - 1.2.3 + 1.2.4