diff --git a/docs/404.html b/docs/404.html index 4040fc6..504b67e 100644 --- a/docs/404.html +++ b/docs/404.html @@ -67,7 +67,7 @@
diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 80e6b39..e9dc91a 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -67,7 +67,7 @@ diff --git a/docs/articles/GroupedSampling.html b/docs/articles/GroupedSampling.html index 257e93d..1fff54b 100644 --- a/docs/articles/GroupedSampling.html +++ b/docs/articles/GroupedSampling.html @@ -31,7 +31,7 @@ @@ -91,7 +91,7 @@vignettes/GroupedSampling.Rmd
GroupedSampling.Rmd
## x y z id one ord row_number
-## 1: a a a 909 1 0.04891075 1
-## 2: a a a 6960 1 0.08585692 2
-## 3: a a a 27931 1 0.20268071 3
-## 4: a a a 13108 1 0.25831462 4
-## 5: a a a 67683 1 0.66286079 5
-## 6: a a b 4168 1 0.01941657 1
+## 1: a a a 99342 1 0.29592013 1
+## 2: a a a 41989 1 0.45891485 2
+## 3: a a a 12254 1 0.99895914 3
+## 4: a a b 30135 1 0.09357431 1
+## 5: a a b 20897 1 0.22383060 2
+## 6: a a b 59731 1 0.54500068 3
And the database version is very similar (on databases with window functions).
The main issue is landing the random order without having to translate the R
runif(sum(one))
code into database operations.
library("rquery")
@@ -167,13 +167,13 @@ 2019-10-23
samp <- ex_data_table(sample_ops)
head(samp)
-## x y z id ord row_number
-## 1: a a a 6960 0.0623182002 1
-## 2: a a a 58463 0.3283458794 2
-## 3: a a a 27931 0.5018074675 3
-## 4: a a a 67683 0.8144518612 4
-## 5: a a a 13108 0.9170139052 5
-## 6: a a b 39807 0.0001303996 1
+## x y z id ord row_number
+## 1: a a a 12254 0.47605028 1
+## 2: a a a 41989 0.61569890 2
+## 3: a a a 99342 0.87579154 3
+## 4: a a b 25056 0.02373051 1
+## 5: a a b 59731 0.03065273 2
+## 6: a a b 30135 0.25620100 3
The translations available are listed in the package variable rqdatatable:::data_table_extend_fns
.
## List of 6
diff --git a/docs/articles/R_mapping.html b/docs/articles/R_mapping.html
index 21ab94f..d5bbdb4 100644
--- a/docs/articles/R_mapping.html
+++ b/docs/articles/R_mapping.html
@@ -31,7 +31,7 @@
R mapping
John Mount
- 2019-10-23
+ 2019-11-12
Source: vignettes/R_mapping.Rmd
R_mapping.Rmd
diff --git a/docs/articles/index.html b/docs/articles/index.html
index 001b5ee..40f6451 100644
--- a/docs/articles/index.html
+++ b/docs/articles/index.html
@@ -67,7 +67,7 @@
diff --git a/docs/articles/logisticexample.html b/docs/articles/logisticexample.html
index 24853d0..77d0c05 100644
--- a/docs/articles/logisticexample.html
+++ b/docs/articles/logisticexample.html
@@ -31,7 +31,7 @@
@@ -91,7 +91,7 @@
Logistic Example
John Mount
- 2019-10-23
+ 2019-11-12
Source: vignettes/logisticexample.Rmd
logisticexample.Rmd
@@ -104,37 +104,36 @@ 2019-10-23
## Loading required package: rquery
-## Warning: package 'data.table' was built under R version 3.5.2
-# data example
-dL <- wrapr::build_frame(
- "subjectID", "surveyCategory" , "assessmentTotal" |
- 1 , "withdrawal behavior", 5 |
- 1 , "positive re-framing", 2 |
- 2 , "withdrawal behavior", 3 |
- 2 , "positive re-framing", 4 )
-scale <- 0.237
-
-# example rquery pipeline
-rquery_pipeline <- local_td(dL) %.>%
- extend_nse(.,
- one = 1) %.>%
- extend_nse(.,
- probability =
- exp(assessmentTotal * scale)/
- sum(exp(assessmentTotal * scale)),
- count = sum(one),
- rank = rank(probability, surveyCategory),
- partitionby = 'subjectID') %.>%
- extend_nse(.,
- isdiagnosis = rank == count,
- diagnosis = surveyCategory) %.>%
- select_rows_nse(.,
- isdiagnosis == TRUE) %.>%
- select_columns(.,
- c('subjectID', 'diagnosis', 'probability')) %.>%
- orderby(., 'subjectID')
+# data example
+dL <- wrapr::build_frame(
+ "subjectID", "surveyCategory" , "assessmentTotal" |
+ 1 , "withdrawal behavior", 5 |
+ 1 , "positive re-framing", 2 |
+ 2 , "withdrawal behavior", 3 |
+ 2 , "positive re-framing", 4 )
+scale <- 0.237
+
+# example rquery pipeline
+rquery_pipeline <- local_td(dL) %.>%
+ extend_nse(.,
+ one = 1) %.>%
+ extend_nse(.,
+ probability =
+ exp(assessmentTotal * scale)/
+ sum(exp(assessmentTotal * scale)),
+ count = sum(one),
+ rank = rank(probability, surveyCategory),
+ partitionby = 'subjectID') %.>%
+ extend_nse(.,
+ isdiagnosis = rank == count,
+ diagnosis = surveyCategory) %.>%
+ select_rows_nse(.,
+ isdiagnosis == TRUE) %.>%
+ select_columns(.,
+ c('subjectID', 'diagnosis', 'probability')) %.>%
+ orderby(., 'subjectID')
Show expanded form of query tree.
-
+
mk_td("dL", c(
"subjectID",
"surveyCategory",
@@ -157,14 +156,14 @@ 2019-10-23
diagnosis := surveyCategory) %.>%
select_rows(.,
isdiagnosis == TRUE) %.>%
- select_columns(., c(
- "subjectID", "diagnosis", "probability")) %.>%
+ select_columns(.,
+ c('subjectID', 'diagnosis', 'probability')) %.>%
order_rows(.,
c('subjectID'),
reverse = c(),
limit = NULL)
Execute the calculation.
-
+
## subjectID diagnosis probability
## 1: 1 withdrawal behavior 0.6706221
## 2: 2 positive re-framing 0.5589742
diff --git a/docs/authors.html b/docs/authors.html
index 8d7b42d..3406a00 100644
--- a/docs/authors.html
+++ b/docs/authors.html
@@ -67,7 +67,7 @@
diff --git a/docs/index.html b/docs/index.html
index c860db9..3c91cbd 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -32,7 +32,7 @@
@@ -94,7 +94,7 @@
A Python
version of rquery
/rqdatatable
is under initial development as data_algebra
.
For example scoring a logistic regression model (which requires grouping, ordering, and ranking) is organized as follows. For more on this example please see “Let’s Have Some Sympathy For The Part-time R User”.
-
+
# data example
dL <- build_frame(
"subjectID", "surveyCategory" , "assessmentTotal" |
@@ -152,17 +152,17 @@
limit = NULL)
And execute it using data.table
.
-## subjectID diagnosis probability
-## 1: 1 withdrawal behavior 0.6706221
-## 2: 2 positive re-framing 0.5589742
+## subjectID diagnosis probability
+## 1: 1 withdrawal behavior 0.6706221
+## 2: 2 positive re-framing 0.5589742
One can also apply the pipeline to new tables.
build_frame(
"subjectID", "surveyCategory" , "assessmentTotal" |
7 , "withdrawal behavior", 5 |
7 , "positive re-framing", 20 ) %.>%
rquery_pipeline
-
+
Initial bench-marking of rqdatatable
is very favorable (notes here).
Note rqdatatable
has an “immediate mode” which allows direct application of pipelines stages without pre-assembling the pipeline. “Immediate mode” is a convenience for ad-hoc analyses, and has some negative performance impact, so we encourage users to build pipelines for most work. Some notes on the issue can be found here.
rqdatatable
implements the rquery
grammar in the style of a “Turing or Cook reduction” (implementing the result in terms of multiple oracle calls to the related system). For some related work on translating dplyr
to data.table
in the style of “Karp reductions” (implementing the result as a translation into a single problem in the related system) please see table.express
and also dtplyr
(though the 0.0.3
and prior versions of dtplyr
have severe performance issues due to excess table copying, ref, ref).
diff --git a/docs/news/index.html b/docs/news/index.html
index e48b4f0..0111520 100644
--- a/docs/news/index.html
+++ b/docs/news/index.html
@@ -67,7 +67,7 @@
@@ -128,9 +128,19 @@ Changelog
Source: NEWS.md
+
+
+rqdatatable 1.2.4 2019/11/12 Unreleased
+
+
+- Remove unused methods import.
+- More tests on drop_columns.
+- Catch degenerate project.
+
+
-rqdatatable 1.2.3 2019/10/23 Unreleased
+rqdatatable 1.2.3 2019/10/23 2019-10-24
- Correct how we remove columns to use proper data.table notation.
@@ -309,6 +319,7 @@
diff --git a/docs/reference/ex_data_table.html b/docs/reference/ex_data_table.html
index fcbff45..d17b3fd 100644
--- a/docs/reference/ex_data_table.html
+++ b/docs/reference/ex_data_table.html
@@ -69,7 +69,7 @@
@@ -201,8 +201,8 @@ Examp
#> "x",
#> "y",
#> "z")) %.>%
-#> select_columns(., c(
-#> "x", "y")) %.>%
+#> select_columns(.,
+#> c('x', 'y')) %.>%
#> select_rows(.,
#> x < 2 & y < 30) ex_data_table(optree)#> x y
#> 1: 1 20
diff --git a/docs/reference/ex_data_table.relop_drop_columns.html b/docs/reference/ex_data_table.relop_drop_columns.html
index 60836ef..c78501b 100644
--- a/docs/reference/ex_data_table.relop_drop_columns.html
+++ b/docs/reference/ex_data_table.relop_drop_columns.html
@@ -69,7 +69,7 @@
@@ -174,8 +174,8 @@
Examp
dL <- data.frame(x = 1, y = 2, z = 3)
rquery_pipeline <- local_td(dL) %.>%
drop_columns(., "y")
-ex_data_table(rquery_pipeline)#> x y z
-#> 1: 1 2 3
+ex_data_table(rquery_pipeline)#> x z
+#> 1: 1 3
@@ -91,7 +91,7 @@
diff --git a/docs/reference/ex_data_table.relop_natural_join.html b/docs/reference/ex_data_table.relop_natural_join.html
index 203da12..69b8f05 100644
--- a/docs/reference/ex_data_table.relop_natural_join.html
+++ b/docs/reference/ex_data_table.relop_natural_join.html
@@ -69,7 +69,7 @@
diff --git a/docs/reference/ex_data_table.relop_non_sql.html b/docs/reference/ex_data_table.relop_non_sql.html
index 405cb81..3c9d525 100644
--- a/docs/reference/ex_data_table.relop_non_sql.html
+++ b/docs/reference/ex_data_table.relop_non_sql.html
@@ -69,7 +69,7 @@
diff --git a/docs/reference/ex_data_table.relop_null_replace.html b/docs/reference/ex_data_table.relop_null_replace.html
index f822a24..6d2b408 100644
--- a/docs/reference/ex_data_table.relop_null_replace.html
+++ b/docs/reference/ex_data_table.relop_null_replace.html
@@ -69,7 +69,7 @@
diff --git a/docs/reference/ex_data_table.relop_order_expr.html b/docs/reference/ex_data_table.relop_order_expr.html
index 7503875..fdfb160 100644
--- a/docs/reference/ex_data_table.relop_order_expr.html
+++ b/docs/reference/ex_data_table.relop_order_expr.html
@@ -69,7 +69,7 @@
diff --git a/docs/reference/ex_data_table.relop_orderby.html b/docs/reference/ex_data_table.relop_orderby.html
index 3c8ab48..ef0de0e 100644
--- a/docs/reference/ex_data_table.relop_orderby.html
+++ b/docs/reference/ex_data_table.relop_orderby.html
@@ -69,7 +69,7 @@
diff --git a/docs/reference/ex_data_table.relop_project.html b/docs/reference/ex_data_table.relop_project.html
index c0291a3..ba52480 100644
--- a/docs/reference/ex_data_table.relop_project.html
+++ b/docs/reference/ex_data_table.relop_project.html
@@ -69,7 +69,7 @@
diff --git a/docs/reference/ex_data_table.relop_rename_columns.html b/docs/reference/ex_data_table.relop_rename_columns.html
index 8664eeb..fbc9c45 100644
--- a/docs/reference/ex_data_table.relop_rename_columns.html
+++ b/docs/reference/ex_data_table.relop_rename_columns.html
@@ -69,7 +69,7 @@
diff --git a/docs/reference/ex_data_table.relop_select_columns.html b/docs/reference/ex_data_table.relop_select_columns.html
index e0e36b3..132befb 100644
--- a/docs/reference/ex_data_table.relop_select_columns.html
+++ b/docs/reference/ex_data_table.relop_select_columns.html
@@ -69,7 +69,7 @@
diff --git a/docs/reference/ex_data_table.relop_select_rows.html b/docs/reference/ex_data_table.relop_select_rows.html
index 62d30e6..7562e96 100644
--- a/docs/reference/ex_data_table.relop_select_rows.html
+++ b/docs/reference/ex_data_table.relop_select_rows.html
@@ -69,7 +69,7 @@
diff --git a/docs/reference/ex_data_table.relop_set_indicator.html b/docs/reference/ex_data_table.relop_set_indicator.html
index 7e97617..25ca4c2 100644
--- a/docs/reference/ex_data_table.relop_set_indicator.html
+++ b/docs/reference/ex_data_table.relop_set_indicator.html
@@ -69,7 +69,7 @@
diff --git a/docs/reference/ex_data_table.relop_sql.html b/docs/reference/ex_data_table.relop_sql.html
index 36ef18b..ba34fb4 100644
--- a/docs/reference/ex_data_table.relop_sql.html
+++ b/docs/reference/ex_data_table.relop_sql.html
@@ -71,7 +71,7 @@
diff --git a/docs/reference/ex_data_table.relop_table_source.html b/docs/reference/ex_data_table.relop_table_source.html
index aa94dcd..2e28636 100644
--- a/docs/reference/ex_data_table.relop_table_source.html
+++ b/docs/reference/ex_data_table.relop_table_source.html
@@ -72,7 +72,7 @@
diff --git a/docs/reference/ex_data_table.relop_theta_join.html b/docs/reference/ex_data_table.relop_theta_join.html
index 965c928..614aea0 100644
--- a/docs/reference/ex_data_table.relop_theta_join.html
+++ b/docs/reference/ex_data_table.relop_theta_join.html
@@ -69,7 +69,7 @@
diff --git a/docs/reference/ex_data_table.relop_unionall.html b/docs/reference/ex_data_table.relop_unionall.html
index e64d579..545a58c 100644
--- a/docs/reference/ex_data_table.relop_unionall.html
+++ b/docs/reference/ex_data_table.relop_unionall.html
@@ -69,7 +69,7 @@
diff --git a/docs/reference/ex_data_table_parallel.html b/docs/reference/ex_data_table_parallel.html
index 451a1cc..c94dfd8 100644
--- a/docs/reference/ex_data_table_parallel.html
+++ b/docs/reference/ex_data_table_parallel.html
@@ -72,7 +72,7 @@
diff --git a/docs/reference/index.html b/docs/reference/index.html
index 70e2af3..49468ab 100644
--- a/docs/reference/index.html
+++ b/docs/reference/index.html
@@ -67,7 +67,7 @@
diff --git a/docs/reference/layout_to_blocks_data_table.html b/docs/reference/layout_to_blocks_data_table.html
index ce0c582..a4cf343 100644
--- a/docs/reference/layout_to_blocks_data_table.html
+++ b/docs/reference/layout_to_blocks_data_table.html
@@ -71,7 +71,7 @@
diff --git a/docs/reference/layout_to_rowrecs_data_table.html b/docs/reference/layout_to_rowrecs_data_table.html
index 239a3d5..1fc91ca 100644
--- a/docs/reference/layout_to_rowrecs_data_table.html
+++ b/docs/reference/layout_to_rowrecs_data_table.html
@@ -71,7 +71,7 @@
diff --git a/docs/reference/make_dt_lookup_by_column.html b/docs/reference/make_dt_lookup_by_column.html
index 2ad8103..2e37941 100644
--- a/docs/reference/make_dt_lookup_by_column.html
+++ b/docs/reference/make_dt_lookup_by_column.html
@@ -70,7 +70,7 @@
diff --git a/docs/reference/rbindlist_data_table.html b/docs/reference/rbindlist_data_table.html
index 7049070..0155c35 100644
--- a/docs/reference/rbindlist_data_table.html
+++ b/docs/reference/rbindlist_data_table.html
@@ -69,7 +69,7 @@
diff --git a/docs/reference/reexports.html b/docs/reference/reexports.html
index 84b83b6..958d661 100644
--- a/docs/reference/reexports.html
+++ b/docs/reference/reexports.html
@@ -78,7 +78,7 @@
diff --git a/docs/reference/rq_df_funciton_node.html b/docs/reference/rq_df_funciton_node.html
index 3af3aa9..e966b26 100644
--- a/docs/reference/rq_df_funciton_node.html
+++ b/docs/reference/rq_df_funciton_node.html
@@ -69,7 +69,7 @@
@@ -221,17 +221,17 @@