Skip to content

Commit

Permalink
Implement parsing of "Exact dates" as described at https://wiki.opens…
Browse files Browse the repository at this point in the history
  • Loading branch information
lehmann-4178656ch committed Jan 26, 2024
1 parent 9816435 commit f644dd1
Show file tree
Hide file tree
Showing 4 changed files with 766 additions and 25 deletions.
23 changes: 23 additions & 0 deletions include/osm2rdf/osm/FactHandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@

namespace osm2rdf::osm {

enum DateTimeType {
invalid = 0,
date_yyyy = 1,
date_yyyy_mm = 2,
date_yyyy_mm_dd = 3,
date_time = 4
};

template <typename W>
class FactHandler {
public:
Expand Down Expand Up @@ -75,6 +83,21 @@ class FactHandler {
FRIEND_TEST(OSM_FactHandler, writeTagListWikipediaWithLang);
FRIEND_TEST(OSM_FactHandler, writeTagListWikipediaWithoutLang);
FRIEND_TEST(OSM_FactHandler, writeTagListSkipWikiLinks);
FRIEND_TEST(OSM_FactHandler, writeTagListStartDateInvalid);
FRIEND_TEST(OSM_FactHandler, writeTagListStartDateInvalid2);
FRIEND_TEST(OSM_FactHandler, writeTagListStartDateInvalid3);
FRIEND_TEST(OSM_FactHandler, writeTagListStartDateYear1);
FRIEND_TEST(OSM_FactHandler, writeTagListStartDateYear2);
FRIEND_TEST(OSM_FactHandler, writeTagListStartDateYear3);
FRIEND_TEST(OSM_FactHandler, writeTagListStartDateYear4);
FRIEND_TEST(OSM_FactHandler, writeTagListStartDateYearMonth1);
FRIEND_TEST(OSM_FactHandler, writeTagListStartDateYearMonth2);
FRIEND_TEST(OSM_FactHandler, writeTagListStartDateYearMonth3);
FRIEND_TEST(OSM_FactHandler, writeTagListStartDateYearMonth4);
FRIEND_TEST(OSM_FactHandler, writeTagListStartDateYearMonthDay1);
FRIEND_TEST(OSM_FactHandler, writeTagListStartDateYearMonthDay2);
FRIEND_TEST(OSM_FactHandler, writeTagListStartDateYearMonthDay3);
FRIEND_TEST(OSM_FactHandler, writeTagListStartDateYearMonthDay4);

void writeSecondsAsISO(const std::string& s, const std::string& p,
const std::time_t& t);
Expand Down
76 changes: 52 additions & 24 deletions src/osm/FactHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,31 +492,59 @@ void osm2rdf::osm::FactHandler<W>::writeTagList(
}
}
if (key == "start_date" || key == "end_date") {
const auto dashCount = std::count(value.begin(), value.end(), '-');
switch (dashCount) {
case 2:
_writer->writeTriple(
subj,
_writer->generateIRIUnsafe(
DATASET_NAMESPACE[_config.sourceDataset], key),
_writer->generateLiteralUnsafe(value, "^^" + IRI__XSD_DATE));
break;
case 1:
_writer->writeTriple(
subj,
_writer->generateIRIUnsafe(
DATASET_NAMESPACE[_config.sourceDataset], key),
_writer->generateLiteralUnsafe(value,
"^^" + IRI__XSD_YEAR_MONTH));
break;
case 0:
_writer->writeTriple(
subj,
_writer->generateIRIUnsafe(
DATASET_NAMESPACE[_config.sourceDataset], key),
_writer->generateLiteralUnsafe(value, "^^" + IRI__XSD_YEAR));
break;
// Abort if non digit and not -
if(std::any_of(value.cbegin(), value.cend(), [](char c) { return isdigit(c) == 0 && c != '-'; })) {
continue;
}

// Skip if empty
if (value.empty()) {
continue;
}
// Skip if only '-'
size_t minusCount = std::count(value.begin(), value.end(), '-');
if (minusCount == value.size()) {
continue;
}

std::vector<std::string> parts;
parts.reserve(minusCount + 1);
size_t last = 0;
size_t next;
for (size_t i = 0; i < (minusCount + 1); ++i) {
next = value.find('-', last);
parts.emplace_back(value.substr(last, next - last));
last = next + 1;
}

auto resultType = 0;
std::string newValue;
newValue.reserve(value.size());
std::ostringstream tmp;
tmp << std::setfill('0');
for (size_t i = 0; i < (minusCount + 1); ++i) {
if (i == 0 && parts[i].empty()) {
newValue += '-';
continue;
}
tmp << std::setw(resultType == 0 ? 4 : 2) << std::dec
<< std::atoi(parts[i].c_str());
newValue += tmp.str().substr(0, resultType == 0 ? 4 : 2) + '-';
tmp.seekp(0);
resultType++;
}
if (resultType > 3) {
// Invalid length
continue;
}
std::string typeString[3] = {IRI__XSD_YEAR, IRI__XSD_YEAR_MONTH,
IRI__XSD_DATE};
_writer->writeTriple(subj,
_writer->generateIRIUnsafe(
DATASET_NAMESPACE[_config.sourceDataset], key),
_writer->generateLiteralUnsafe(
newValue.substr(0, newValue.size() - 1),
"^^" + typeString[resultType - 1]));
}
}
_writer->writeTriple(
Expand Down
2 changes: 1 addition & 1 deletion src/ttl/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ osm2rdf::ttl::Writer<T>::Writer(const osm2rdf::config::Config& config,
osm2rdf::ttl::constants::IRI__XSD_YEAR =
generateIRI(osm2rdf::ttl::constants::NAMESPACE__XML_SCHEMA, "gYear");
osm2rdf::ttl::constants::IRI__XSD_YEAR_MONTH =
generateIRI(osm2rdf::ttl::constants::NAMESPACE__XML_SCHEMA, "gYearMont");
generateIRI(osm2rdf::ttl::constants::NAMESPACE__XML_SCHEMA, "gYearMonth");

osm2rdf::ttl::constants::LITERAL__NO = generateLiteral("no", "");
osm2rdf::ttl::constants::LITERAL__YES = generateLiteral("yes", "");
Expand Down
Loading

0 comments on commit f644dd1

Please sign in to comment.