Skip to content

Commit

Permalink
Fix NWS image timestamp extraction (#137)
Browse files Browse the repository at this point in the history
The pattern that encodes the timestamp for NWS images used to be irregular and
required careful parsing. This pattern was changed some time before November
2020 and became regular. The irregular parser produced invalid timestamps when
applied to filenames with the new pattern. This commit updates the function
that extracts a timestamp from the filename to expect the regular pattern.

This commit also removes the timestamp prefix from the filename if it could be
extracted, allowing users to specify their own timestamp pattern.

This means that a handler with the following definition:
```
filename = "{time:%Y%m%dT%H%M%SZ}_{filename}"
```

No longer produces filenames:
```
20220327T110046Z_20220327110046-hiwind_pac_latest.gif
```

But rather:
```
20220327T110046Z_hiwind_pac_latest.gif
```

Fixes #100.
  • Loading branch information
pietern authored Mar 27, 2022
1 parent d61405e commit 9f21e52
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 65 deletions.
2 changes: 1 addition & 1 deletion src/goesproc/filename.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ struct FilenameBuilder {
std::string dir;
std::string filename;

struct timespec time;
struct timespec time{0, 0};
AWIPS awips;
Product product;
Region region;
Expand Down
86 changes: 22 additions & 64 deletions src/goesproc/handler_nws_image.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,68 +6,27 @@

namespace {

void parseIrregularTime(const std::string& text, struct timespec& time) {
// Unlike the NWS text files, the NWS image files on GOES-R
// don't use a consistent pattern for time in their name.
//
// Example file names:
// - 201801010001834-pacsfc48_latestBW.gif
// - 201803640503770-pacsfc72_latestBW.gif
// - 201803640503019-USA_latest.gif
// - 2018041050104193-pac24_latestBW.gif
//
// As you can see, the month is not followed by the day of the
// month, but the day of the year. The number of seconds and
// sub-seconds may miss a leading 0. These ambiguities make that
// we stick to extracting the year, month, day, hour, and minute.
//
std::string parseTime(const std::string& text, struct timespec& time) {
// This field used an irregular pattern before November 2020.
// See https://github.com/pietern/goestools/issues/100 for historical context.
const char* buf = text.c_str();
const char* format = "%Y%m";
const char* format = "%Y%m%d%H%M%S";
struct tm tm;
memset(&tm, 0, sizeof(tm));
auto ptr = strptime(buf, format, &tm);
const auto ptr = strptime(buf, format, &tm);

// Only use time if strptime was successful
if (ptr != (buf + 6)) {
return;
}

// Number of characters used for day of year
const auto month = tm.tm_mon + 1;
auto mlen = 2;
buf = ptr;

// April contains both 2 and 3 digits day of year.
// If it starts with a '1' it must be 3 digits.
if ((month == 4 && buf[0] == '1') || month > 4) {
mlen = 3;
}

// Interpret variable length day of year.
char tmp[4];
int yday;
memcpy(tmp, buf, mlen);
tmp[mlen] = 0;
auto rv = sscanf(tmp, "%d", &yday);
if (rv != 1 || yday >= 367) {
return;
}

buf += mlen;
format = "%H%M";
ptr = strptime(buf, format, &tm);

// Only use time if strptime was successful
if (ptr != (buf + 4)) {
return;
// Only use time if strptime was successful.
// Format with zero padding is always 14 characters.
// The character after the time must be '-'.
if (ptr != (buf + 14) || ptr[0] != '-') {
return text;
}

// Set day to January 1 before mktime, so we can use simple
// arithmetic to get to the real day of the year.
tm.tm_mon = 0;
tm.tm_mday = 1;
time.tv_sec = mktime(&tm) + (60 * 60 * 24 * (yday - 1));
time.tv_sec = mktime(&tm);
time.tv_nsec = 0;

// Return everything after the separator.
return std::string(&ptr[1]);
}

} // namespace
Expand All @@ -91,21 +50,20 @@ void NWSImageHandler::handle(std::shared_ptr<const lrit::File> f) {
return;
}

FilenameBuilder fb;
fb.dir = config_.dir;
fb.filename = getBasename(*f);

// In the GOES-15 LRIT stream these text files have a time stamp
// header; in the GOES-R HRIT stream they don't.
struct timespec time = {0, 0};
if (f->hasHeader<lrit::TimeStampHeader>()) {
time = f->getHeader<lrit::TimeStampHeader>().getUnix();
fb.time = f->getHeader<lrit::TimeStampHeader>().getUnix();
} else {
auto text = f->getHeader<lrit::AnnotationHeader>().text;
parseIrregularTime(text, time);
// If time can successfully be extracted from the filename
// then remove it from the filename passed to the builder.
fb.filename = parseTime(fb.filename, fb.time);
}

FilenameBuilder fb;
fb.dir = config_.dir;
fb.filename = getBasename(*f);
fb.time = time;

// If this is a GIF we can write it directly
if (nlh.noaaSpecificCompression == 5) {
auto path = fb.build(config_.filename, "gif");
Expand Down

0 comments on commit 9f21e52

Please sign in to comment.