Skip to content

Commit

Permalink
progress -- showing overall running time when completed
Browse files Browse the repository at this point in the history
  • Loading branch information
zhengxwen committed Apr 28, 2017
1 parent a22dd10 commit bd7dd09
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 55 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: SeqArray
Type: Package
Title: Big Data Management of Whole-genome Sequence Variant Calls
Version: 1.15.6
Date: 2017-04-10
Version: 1.17.1
Date: 2017-04-27
Depends: R (>= 3.3.0), gdsfmt (>= 1.10.1)
Imports: methods, parallel, S4Vectors, IRanges, GenomicRanges,
GenomeInfoDb, SummarizedExperiment, Biostrings, VariantAnnotation
Expand Down
7 changes: 7 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
CHANGES IN VERSION 1.17.0-1.17.1

o progress information: showing overall running time when completed


CHANGES IN VERSION 1.16.0

-------------------------

o a new argument 'intersect' in `seqSetFilter()` and `seqSetFilterChrom()`

o a new function `seqSetFilterCond()`
Expand Down
74 changes: 28 additions & 46 deletions src/Index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,27 @@ static const double S_HOUR = 60 * S_MIN;
static const double S_DAY = 24 * S_HOUR;
static const double S_YEAR = 365 * S_DAY;

static const char *time_str(double s)
{
if (R_FINITE(s))
{
static char buffer[64];
if (s < S_MIN)
sprintf(buffer, "%.0fs", s);
else if (s < S_HOUR)
sprintf(buffer, "%.1fm", s/S_MIN);
else if (s < S_DAY)
sprintf(buffer, "%.1fh", s/S_HOUR);
else if (s < S_YEAR)
sprintf(buffer, "%.1fd", s/S_DAY);
else
sprintf(buffer, "%.1f years", s/S_YEAR);
return buffer;
} else
return "---";
}


CProgress::CProgress(C_Int64 start, C_Int64 count, SEXP conn, bool newline)
{
TotalCount = count;
Expand All @@ -778,6 +799,7 @@ CProgress::CProgress(C_Int64 start, C_Int64 count, SEXP conn, bool newline)
}

time_t s; time(&s);
_start_time = s;
_timer.reserve(128);
_timer.push_back(pair<double, time_t>(percent, s));

Expand Down Expand Up @@ -837,37 +859,9 @@ void CProgress::ShowProgress()
// show
if (NewLine)
{
if (R_FINITE(s))
{
if (s < S_MIN)
ConnPutText(File, "[%s] %2.0f%%, ETC: %.0fs\n", bar, p, s);
else if (s < S_HOUR)
ConnPutText(File, "[%s] %2.0f%%, ETC: %.1fm\n", bar, p, s/S_MIN);
else if (s < S_DAY)
ConnPutText(File, "[%s] %2.0f%%, ETC: %.1fh\n", bar, p, s/S_HOUR);
else if (s < S_YEAR)
ConnPutText(File, "[%s] %2.0f%%, ETC: %.1fd\n", bar, p, s/S_DAY);
else
ConnPutText(File, "[%s] %2.0f%%, ETC: %.1f years\n", bar, p, s/S_YEAR);
} else {
ConnPutText(File, "[%s] %2.0f%%, ETC: ---\n", bar, p);
}
ConnPutText(File, "[%s] %2.0f%%, ETC: %s\n", bar, p, time_str(s));
} else {
if (R_FINITE(s))
{
if (s < S_MIN)
ConnPutText(File, "\r[%s] %2.0f%%, ETC: %.0fs ", bar, p, s);
else if (s < S_HOUR)
ConnPutText(File, "\r[%s] %2.0f%%, ETC: %.1fm ", bar, p, s/S_MIN);
else if (s < S_DAY)
ConnPutText(File, "\r[%s] %2.0f%%, ETC: %.1fh ", bar, p, s/S_HOUR);
else if (s < S_YEAR)
ConnPutText(File, "\r[%s] %2.0f%%, ETC: %.1fd ", bar, p, s/S_DAY);
else
ConnPutText(File, "\r[%s] %2.0f%%, ETC: %.1f years", bar, p, s/S_YEAR);
} else {
ConnPutText(File, "\r[%s] %2.0f%%, ETC: --- ", bar, p);
}
ConnPutText(File, "\r[%s] %2.0f%%, ETC: %s ", bar, p, time_str(s));
if (Counter >= TotalCount) ConnPutText(File, "\n");
}
} else {
Expand Down Expand Up @@ -931,27 +925,15 @@ void CProgressStdOut::ShowProgress()
p *= 100;

// show
_last_time = now;
if (Counter >= TotalCount)
{
Rprintf("\r[%s] 100%%, completed \n", bar);
s = difftime(_last_time, _start_time);
Rprintf("\r[%s] 100%%, completed in %s\n", bar, time_str(s));
} else if ((interval >= 5) || (Counter <= 0))
{
_last_time = now;
if (R_FINITE(s))
{
if (s < S_MIN)
Rprintf("\r[%s] %2.0f%%, ETC: %.0fs ", bar, p, s);
else if (s < S_HOUR)
Rprintf("\r[%s] %2.0f%%, ETC: %.1fm ", bar, p, s/S_MIN);
else if (s < S_DAY)
Rprintf("\r[%s] %2.0f%%, ETC: %.1fh ", bar, p, s/S_HOUR);
else if (s < S_YEAR)
Rprintf("\r[%s] %2.0f%%, ETC: %.1fd ", bar, p, s/S_DAY);
else
Rprintf("\r[%s] %2.0f%%, ETC: %.1f years ", bar, p, s/S_YEAR);
} else {
Rprintf("\r[%s] %2.0f%%, ETC: --- ", bar, p);
}
Rprintf("\r[%s] %2.0f%%, ETC: %s ", bar, p, time_str(s));
}
}
}
Expand Down
5 changes: 3 additions & 2 deletions src/Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -478,8 +478,9 @@ class COREARRAY_DLL_LOCAL CProgress

protected:
C_Int64 TotalCount; ///< the total number
C_Int64 Counter; ///< the current counter
Rconnection File; ///< R connection
C_Int64 Counter; ///< the current counter
Rconnection File; ///< R connection
time_t _start_time; ///< the starting time
bool NewLine;
double _start, _step;
C_Int64 _hit;
Expand Down
10 changes: 5 additions & 5 deletions vignettes/R_Integration.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ Principal Component Analysis (PCA) is a common tool used in exploratory data ana
# covariance variable with an initial value
s <- 0

seqApply(f, "$dosage", function(x)
seqApply(file, "$dosage", function(x)
{
p <- 0.5 * mean(x, na.rm=TRUE) # allele frequency
g <- (x - 2*p) / sqrt(p*(1-p)) # normalized by allele frequency
Expand All @@ -153,9 +153,9 @@ eig <- eigen(s)

```
[..................................................] 0%, ETC: ---
[=>................................................] 2%, ETC: 5.4m
[==================>...............................] 36%, ETC: 4.1m
...
[==================================================] 100%, completed
[==================================================] 100%, completed in 14.3m
```

```R
Expand All @@ -180,7 +180,7 @@ eig <- eigen(s)
[..................................................] 0%, ETC: ---
[==================>...............................] 35%, ETC: 9s
[======================================>...........] 75%, ETC: 3s
[==================================================] 100%, completed
[==================================================] 100%, completed in 14s
```

`seqParallel()` utilizes the facilities offered by the R parallel package to perform calculations within a cluster or SMP environment, and the genotypes are automatically split into non-overlapping parts. The parallel implementation with R is shown as follows, and the C optimized function is also available in the SNPRelate package.
Expand Down Expand Up @@ -282,7 +282,7 @@ seqGDS2VCF(file, "exons.vcf.gz")
## FORMAT Field: <none>
##
[..................................................] 0%, ETC: ---
[==================================================] 100%, completed
[==================================================] 100%, completed in 0s
## Tue Jan 3 15:43:55 2017 Done.
```
If random-access memory is sufficiently large, users could load all exon variants via `seqGetData(file, "genotype")`; otherwise, data have to be loaded by chunk or a user-defined function is applied over variants by `seqApply()`.
Expand Down

0 comments on commit bd7dd09

Please sign in to comment.