diff --git a/DESCRIPTION b/DESCRIPTION index 470df7a..9146697 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: SeqArray Type: Package Title: Big Data Management of Whole-genome Sequence Variant Calls -Version: 1.15.6 -Date: 2017-04-10 +Version: 1.17.1 +Date: 2017-04-27 Depends: R (>= 3.3.0), gdsfmt (>= 1.10.1) Imports: methods, parallel, S4Vectors, IRanges, GenomicRanges, GenomeInfoDb, SummarizedExperiment, Biostrings, VariantAnnotation diff --git a/NEWS b/NEWS index f8fe122..cc0c20f 100755 --- a/NEWS +++ b/NEWS @@ -1,5 +1,12 @@ +CHANGES IN VERSION 1.17.0-1.17.1 + + o progress information: showing overall running time when completed + + CHANGES IN VERSION 1.16.0 +------------------------- + o a new argument 'intersect' in `seqSetFilter()` and `seqSetFilterChrom()` o a new function `seqSetFilterCond()` diff --git a/src/Index.cpp b/src/Index.cpp index 8811d60..409ec1f 100755 --- a/src/Index.cpp +++ b/src/Index.cpp @@ -752,6 +752,27 @@ static const double S_HOUR = 60 * S_MIN; static const double S_DAY = 24 * S_HOUR; static const double S_YEAR = 365 * S_DAY; +static const char *time_str(double s) +{ + if (R_FINITE(s)) + { + static char buffer[64]; + if (s < S_MIN) + sprintf(buffer, "%.0fs", s); + else if (s < S_HOUR) + sprintf(buffer, "%.1fm", s/S_MIN); + else if (s < S_DAY) + sprintf(buffer, "%.1fh", s/S_HOUR); + else if (s < S_YEAR) + sprintf(buffer, "%.1fd", s/S_DAY); + else + sprintf(buffer, "%.1f years", s/S_YEAR); + return buffer; + } else + return "---"; +} + + CProgress::CProgress(C_Int64 start, C_Int64 count, SEXP conn, bool newline) { TotalCount = count; @@ -778,6 +799,7 @@ CProgress::CProgress(C_Int64 start, C_Int64 count, SEXP conn, bool newline) } time_t s; time(&s); + _start_time = s; _timer.reserve(128); _timer.push_back(pair(percent, s)); @@ -837,37 +859,9 @@ void CProgress::ShowProgress() // show if (NewLine) { - if (R_FINITE(s)) - { - if (s < S_MIN) - ConnPutText(File, "[%s] %2.0f%%, ETC: %.0fs\n", bar, p, s); - else if (s < S_HOUR) - ConnPutText(File, "[%s] %2.0f%%, ETC: %.1fm\n", bar, p, s/S_MIN); - else if (s < S_DAY) - ConnPutText(File, "[%s] %2.0f%%, ETC: %.1fh\n", bar, p, s/S_HOUR); - else if (s < S_YEAR) - ConnPutText(File, "[%s] %2.0f%%, ETC: %.1fd\n", bar, p, s/S_DAY); - else - ConnPutText(File, "[%s] %2.0f%%, ETC: %.1f years\n", bar, p, s/S_YEAR); - } else { - ConnPutText(File, "[%s] %2.0f%%, ETC: ---\n", bar, p); - } + ConnPutText(File, "[%s] %2.0f%%, ETC: %s\n", bar, p, time_str(s)); } else { - if (R_FINITE(s)) - { - if (s < S_MIN) - ConnPutText(File, "\r[%s] %2.0f%%, ETC: %.0fs ", bar, p, s); - else if (s < S_HOUR) - ConnPutText(File, "\r[%s] %2.0f%%, ETC: %.1fm ", bar, p, s/S_MIN); - else if (s < S_DAY) - ConnPutText(File, "\r[%s] %2.0f%%, ETC: %.1fh ", bar, p, s/S_HOUR); - else if (s < S_YEAR) - ConnPutText(File, "\r[%s] %2.0f%%, ETC: %.1fd ", bar, p, s/S_DAY); - else - ConnPutText(File, "\r[%s] %2.0f%%, ETC: %.1f years", bar, p, s/S_YEAR); - } else { - ConnPutText(File, "\r[%s] %2.0f%%, ETC: --- ", bar, p); - } + ConnPutText(File, "\r[%s] %2.0f%%, ETC: %s ", bar, p, time_str(s)); if (Counter >= TotalCount) ConnPutText(File, "\n"); } } else { @@ -931,27 +925,15 @@ void CProgressStdOut::ShowProgress() p *= 100; // show + _last_time = now; if (Counter >= TotalCount) { - Rprintf("\r[%s] 100%%, completed \n", bar); + s = difftime(_last_time, _start_time); + Rprintf("\r[%s] 100%%, completed in %s\n", bar, time_str(s)); } else if ((interval >= 5) || (Counter <= 0)) { _last_time = now; - if (R_FINITE(s)) - { - if (s < S_MIN) - Rprintf("\r[%s] %2.0f%%, ETC: %.0fs ", bar, p, s); - else if (s < S_HOUR) - Rprintf("\r[%s] %2.0f%%, ETC: %.1fm ", bar, p, s/S_MIN); - else if (s < S_DAY) - Rprintf("\r[%s] %2.0f%%, ETC: %.1fh ", bar, p, s/S_HOUR); - else if (s < S_YEAR) - Rprintf("\r[%s] %2.0f%%, ETC: %.1fd ", bar, p, s/S_DAY); - else - Rprintf("\r[%s] %2.0f%%, ETC: %.1f years ", bar, p, s/S_YEAR); - } else { - Rprintf("\r[%s] %2.0f%%, ETC: --- ", bar, p); - } + Rprintf("\r[%s] %2.0f%%, ETC: %s ", bar, p, time_str(s)); } } } diff --git a/src/Index.h b/src/Index.h index db1080c..488127a 100755 --- a/src/Index.h +++ b/src/Index.h @@ -478,8 +478,9 @@ class COREARRAY_DLL_LOCAL CProgress protected: C_Int64 TotalCount; ///< the total number - C_Int64 Counter; ///< the current counter - Rconnection File; ///< R connection + C_Int64 Counter; ///< the current counter + Rconnection File; ///< R connection + time_t _start_time; ///< the starting time bool NewLine; double _start, _step; C_Int64 _hit; diff --git a/vignettes/R_Integration.Rmd b/vignettes/R_Integration.Rmd index 15da0bf..7a68833 100755 --- a/vignettes/R_Integration.Rmd +++ b/vignettes/R_Integration.Rmd @@ -136,7 +136,7 @@ Principal Component Analysis (PCA) is a common tool used in exploratory data ana # covariance variable with an initial value s <- 0 -seqApply(f, "$dosage", function(x) +seqApply(file, "$dosage", function(x) { p <- 0.5 * mean(x, na.rm=TRUE) # allele frequency g <- (x - 2*p) / sqrt(p*(1-p)) # normalized by allele frequency @@ -153,9 +153,9 @@ eig <- eigen(s) ``` [..................................................] 0%, ETC: --- -[=>................................................] 2%, ETC: 5.4m +[==================>...............................] 36%, ETC: 4.1m ... -[==================================================] 100%, completed +[==================================================] 100%, completed in 14.3m ``` ```R @@ -180,7 +180,7 @@ eig <- eigen(s) [..................................................] 0%, ETC: --- [==================>...............................] 35%, ETC: 9s [======================================>...........] 75%, ETC: 3s -[==================================================] 100%, completed +[==================================================] 100%, completed in 14s ``` `seqParallel()` utilizes the facilities offered by the R parallel package to perform calculations within a cluster or SMP environment, and the genotypes are automatically split into non-overlapping parts. The parallel implementation with R is shown as follows, and the C optimized function is also available in the SNPRelate package. @@ -282,7 +282,7 @@ seqGDS2VCF(file, "exons.vcf.gz") ## FORMAT Field: ## [..................................................] 0%, ETC: --- -[==================================================] 100%, completed +[==================================================] 100%, completed in 0s ## Tue Jan 3 15:43:55 2017 Done. ``` If random-access memory is sufficiently large, users could load all exon variants via `seqGetData(file, "genotype")`; otherwise, data have to be loaded by chunk or a user-defined function is applied over variants by `seqApply()`.