Skip to content

Commit

Permalink
[search] Search in downloader by country names.
Browse files Browse the repository at this point in the history
When a request to search in downloader arrives, we used to
only find features on the world map that match the request
and return the mwms that contain these features.

This commit mixes in the results of search directly in
the country tree (countries.txt), or, to be more precise, by
the translations of the names of the countries there (countries_names.txt).

This is not the most efficient implementation but hopefully
it isolated enough to make improvements easy and it was also
useful as an exploration where our current search APIs are lacking, for example

* The unnecessary std::string<->UniString conversions.
* Indexes such as MemSearchIndex pretending to be generic while in fact being
  tailored to a particular use-case.
* The difficulty of mixing search results from different sources.
  • Loading branch information
mpimenov committed Mar 12, 2021
1 parent a644112 commit 9823764
Show file tree
Hide file tree
Showing 18 changed files with 314 additions and 23 deletions.
1 change: 1 addition & 0 deletions android/assets/countries_names.txt
1 change: 1 addition & 0 deletions android/script/replace_links.bat
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ cp ../data/classificator.txt assets/
cp ../data/colors.txt assets/
cp ../data/copyright.html assets/
cp ../data/countries.txt assets/
cp ../data/countries_names.txt assets/
cp ../data/drules_proto_dark.bin assets/
cp ../data/drules_proto_clear.bin assets/
cp ../data/drules_proto_vehicle_dark.bin assets/
Expand Down
1 change: 1 addition & 0 deletions defines.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@

#define COUNTRIES_FILE "countries.txt"
#define COUNTRIES_META_FILE "countries_meta.txt"
#define COUNTRIES_NAMES_FILE "countries_names.txt"
#define LEAP_SPEEDS_FILE "leap_speeds.json"

#define WORLD_FILE_NAME "World"
Expand Down
4 changes: 4 additions & 0 deletions iphone/Maps/Maps.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@
34F73FA31E08300E00AC1FD6 /* Images.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 34F73FA11E08300E00AC1FD6 /* Images.xcassets */; };
34F742321E0834F400AC1FD6 /* UIViewController+Navigation.m in Sources */ = {isa = PBXBuildFile; fileRef = 34F742301E0834F400AC1FD6 /* UIViewController+Navigation.m */; };
34FE5A6F1F18F30F00BCA729 /* TrafficButtonArea.swift in Sources */ = {isa = PBXBuildFile; fileRef = 34FE5A6D1F18F30F00BCA729 /* TrafficButtonArea.swift */; };
3970A6A825B64EE400CF5828 /* countries_names.txt in Resources */ = {isa = PBXBuildFile; fileRef = 3970A6A725B64EE300CF5828 /* countries_names.txt */; };
39CDE69123E1B6C8007CDA58 /* libge0.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 39CDE69023E1B6C8007CDA58 /* libge0.a */; };
3D0D2F7623D858BF00945C8D /* IsolinesTutorialBlur.xib in Resources */ = {isa = PBXBuildFile; fileRef = 3D0D2F7523D858BF00945C8D /* IsolinesTutorialBlur.xib */; };
3D15ACEE2155117000F725D5 /* MWMObjectsCategorySelectorDataSource.mm in Sources */ = {isa = PBXBuildFile; fileRef = 3D15ACED2155117000F725D5 /* MWMObjectsCategorySelectorDataSource.mm */; };
Expand Down Expand Up @@ -1416,6 +1417,7 @@
34FE4C431BCC013500066718 /* MWMMapWidgets.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MWMMapWidgets.h; sourceTree = "<group>"; };
34FE4C441BCC013500066718 /* MWMMapWidgets.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MWMMapWidgets.mm; sourceTree = "<group>"; };
34FE5A6D1F18F30F00BCA729 /* TrafficButtonArea.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TrafficButtonArea.swift; sourceTree = "<group>"; };
3970A6A725B64EE300CF5828 /* countries_names.txt */ = {isa = PBXFileReference; lastKnownFileType = text; name = countries_names.txt; path = ../../data/countries_names.txt; sourceTree = "<group>"; };
39CDE69023E1B6C8007CDA58 /* libge0.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; path = libge0.a; sourceTree = BUILT_PRODUCTS_DIR; };
3D0D2F7523D858BF00945C8D /* IsolinesTutorialBlur.xib */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.xib; path = IsolinesTutorialBlur.xib; sourceTree = "<group>"; };
3D15ACED2155117000F725D5 /* MWMObjectsCategorySelectorDataSource.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = MWMObjectsCategorySelectorDataSource.mm; sourceTree = "<group>"; };
Expand Down Expand Up @@ -2360,6 +2362,7 @@
29B97314FDCFA39411CA2CEA /* Maps */ = {
isa = PBXGroup;
children = (
3970A6A725B64EE300CF5828 /* countries_names.txt */,
47AEF83F2231249E00D20538 /* categories_brands.txt */,
471BBD92213038E000EB17C9 /* TipsAndTricks */,
FA36B8011540388B004560CC /* Bookmarks */,
Expand Down Expand Up @@ -5187,6 +5190,7 @@
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
3970A6A825B64EE400CF5828 /* countries_names.txt in Resources */,
47AEF8402231249E00D20538 /* categories_brands.txt in Resources */,
F6C3A1B221AC22810060EEC8 /* Alert 5.m4a in Resources */,
4560F585213D53C100CC736C /* shaders_metal.metallib in Resources */,
Expand Down
4 changes: 4 additions & 0 deletions map/framework.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1707,6 +1707,10 @@ void Framework::SelectSearchResult(search::Result const & result, bool animation
m_currentPlacePageInfo = {};
ASSERT(false, ("Suggests should not be here."));
return;
case Result::Type::DownloaderEntry:
m_currentPlacePageInfo = {};
ASSERT(false, ("Downloader entries should not be here."));
return;
}

m_currentPlacePageInfo = BuildPlacePageInfo(info);
Expand Down
8 changes: 6 additions & 2 deletions map/search_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,28 @@
#include "map/discovery/discovery_search_params.hpp"
#include "map/everywhere_search_params.hpp"

#include "partners_api/booking_api.hpp"

#include "search/bookmarks/processor.hpp"
#include "search/geometry_utils.hpp"
#include "search/hotels_filter.hpp"
#include "search/tracer.hpp"
#include "search/utils.hpp"

#include "partners_api/booking_api.hpp"

#include "storage/downloader_search_params.hpp"

#include "platform/platform.hpp"
#include "platform/preferred_languages.hpp"
#include "platform/safe_callback.hpp"

#include "geometry/mercator.hpp"

#include "base/checked_cast.hpp"
#include "base/file_name_utils.hpp"
#include "base/string_utils.hpp"

#include "defines.hpp"

#include <algorithm>
#include <cmath>
#include <iterator>
Expand Down
2 changes: 2 additions & 0 deletions search/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ set(
city_finder.cpp
city_finder.hpp
common.hpp
countries_names_index.cpp
countries_names_index.hpp
cuisine_filter.cpp
cuisine_filter.hpp
displayed_categories.cpp
Expand Down
89 changes: 89 additions & 0 deletions search/countries_names_index.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#include "search/countries_names_index.hpp"

#include "platform/platform.hpp"

#include "coding/file_reader.hpp"

#include "base/assert.hpp"

#include <fstream>
#include <set>
#include <sstream>

using namespace std;

namespace search
{
CountriesNamesIndex::CountriesNamesIndex()
{
ReadCountryNamesFromFile(m_countries);
BuildIndexFromTranslations();
}

void CountriesNamesIndex::CollectMatchingCountries(string const & query,
vector<storage::CountryId> & results)
{
set<size_t> ids;
auto insertId = [&ids](size_t id, bool /* exactMatch */) { ids.insert(id); };

vector<strings::UniString> tokens;
search::NormalizeAndTokenizeString(query, tokens);
search::Delimiters delims;
bool const lastTokenIsPrefix = !query.empty() && !delims(strings::LastUniChar(query));
for (size_t i = 0; i < tokens.size(); ++i)
{
auto const & token = tokens[i];
if (i + 1 == tokens.size() && lastTokenIsPrefix)
Retrieve<strings::PrefixDFAModifier<strings::LevenshteinDFA>>(token, insertId);
else
Retrieve<strings::LevenshteinDFA>(token, insertId);
}

// todo(@m) Do not bother with tf/idf for now.
results.clear();
for (auto id : ids)
{
CHECK_LESS(id, m_countries.size(), ());
results.emplace_back(m_countries[id].m_countryId);
}
}

void CountriesNamesIndex::ReadCountryNamesFromFile(vector<Country> & countries)
{
string contents;

GetPlatform().GetReader(COUNTRIES_NAMES_FILE)->ReadAsString(contents);
istringstream ifs(contents);

string line;
countries.clear();
while (getline(ifs, line))
{
if (line.empty())
continue;
strings::Trim(line);
if (line[0] == '[')
{
CHECK_EQUAL(line[line.size() - 1], ']', ());
countries.push_back({});
countries.back().m_countryId = line.substr(1, line.size() - 2);
continue;
}
auto pos = line.find('=');
if (pos == string::npos)
continue;
// Ignore the language code: the language sets differ for StringUtf8Multilang
// and for the translations used by this class.
auto t = line.substr(pos + 1);
strings::Trim(t);
if (!countries.empty())
countries.back().m_doc.m_translations.push_back(t);
}
}

void CountriesNamesIndex::BuildIndexFromTranslations()
{
for (size_t i = 0; i < m_countries.size(); ++i)
m_index.Add(i, m_countries[i].m_doc);
}
} // namespace search
65 changes: 65 additions & 0 deletions search/countries_names_index.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#pragma once

#include "search/base/mem_search_index.hpp"
#include "search/feature_offset_match.hpp"

#include "storage/storage_defines.hpp"

#include "indexer/search_string_utils.hpp"

#include "base/string_utils.hpp"

#include <cstddef>
#include <string>
#include <utility>
#include <vector>

namespace search
{
class CountriesNamesIndex
{
public:
struct Doc
{
template <typename Fn>
void ForEachToken(Fn && fn) const
{
for (auto const & s : m_translations)
fn(StringUtf8Multilang::kDefaultCode, NormalizeAndSimplifyString(s));
}

std::vector<std::string> m_translations;
};

CountriesNamesIndex();

void CollectMatchingCountries(std::string const & query,
std::vector<storage::CountryId> & results);

private:
struct Country
{
storage::CountryId m_countryId;
Doc m_doc;
};

// todo(@m) Almost the same as in bookmarks/processor.hpp.
template <typename DFA, typename Fn>
void Retrieve(strings::UniString const & s, Fn && fn) const
{
SearchTrieRequest<DFA> request;
request.m_names.emplace_back(BuildLevenshteinDFA(s));
request.m_langs.insert(StringUtf8Multilang::kDefaultCode);

MatchFeaturesInTrie(
request, m_index.GetRootIterator(), [](size_t id) { return true; } /* filter */,
std::forward<Fn>(fn));
}

void ReadCountryNamesFromFile(std::vector<Country> & countries);
void BuildIndexFromTranslations();

std::vector<Country> m_countries;
search_base::MemSearchIndex<size_t> m_index;
};
} // namespace search
44 changes: 30 additions & 14 deletions search/downloader_search_callback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@

#include "search/result.hpp"

#include "storage/country_info_getter.hpp"
#include "storage/storage.hpp"

#include "editor/editable_data_source.hpp"

#include "indexer/data_source.hpp"

#include "storage/country_info_getter.hpp"
#include "storage/storage.hpp"

#include "base/assert.hpp"
#include "base/logging.hpp"
#include "base/string_utils.hpp"

#include <set>
#include <string>
#include <utility>

Expand Down Expand Up @@ -66,6 +66,18 @@ void DownloaderSearchCallback::operator()(search::Results const & results)

for (auto const & result : results)
{
if (result.GetResultType() == search::Result::Type::DownloaderEntry)
{
storage::DownloaderSearchResult downloaderResult(result.GetCountryId(),
result.GetString() /* m_matchedName */);
if (uniqueResults.find(downloaderResult) == uniqueResults.end())
{
uniqueResults.insert(downloaderResult);
downloaderSearchResults.m_results.push_back(downloaderResult);
}
continue;
}

if (!result.HasPoint())
continue;

Expand Down Expand Up @@ -98,21 +110,25 @@ void DownloaderSearchCallback::operator()(search::Results const & results)
}
}
}
auto const & mercator = result.GetFeatureCenter();
storage::CountryId const & countryId = m_infoGetter.GetRegionCountryId(mercator);
if (countryId == storage::kInvalidCountryId)
continue;

storage::DownloaderSearchResult downloaderResult(countryId,
result.GetString() /* m_matchedName */);
if (uniqueResults.find(downloaderResult) == uniqueResults.end())
if (result.GetResultType() == search::Result::Type::LatLon)
{
uniqueResults.insert(downloaderResult);
downloaderSearchResults.m_results.push_back(downloaderResult);
auto const & mercator = result.GetFeatureCenter();
storage::CountryId const & countryId = m_infoGetter.GetRegionCountryId(mercator);
if (countryId == storage::kInvalidCountryId)
continue;

storage::DownloaderSearchResult downloaderResult(countryId,
result.GetString() /* m_matchedName */);
if (uniqueResults.find(downloaderResult) == uniqueResults.end())
{
uniqueResults.insert(downloaderResult);
downloaderSearchResults.m_results.push_back(downloaderResult);
}
continue;
}
}

downloaderSearchResults.m_query = m_params.m_query;
downloaderSearchResults.m_endMarker = results.IsEndMarker();

if (m_params.m_onResults)
Expand Down
26 changes: 26 additions & 0 deletions search/processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,9 @@ void Processor::Search(SearchParams const & params)
SetQuery(params.m_query);
SetViewport(viewport);

if (params.m_mode == Mode::Downloader)
SearchInDownloaderByCountryName(params);

// Used to store the earliest available cancellation status:
// if the search has been cancelled, we need to pinpoint the reason
// for cancellation and a further call to CancellationStatus() may
Expand Down Expand Up @@ -835,6 +838,29 @@ void Processor::SearchBookmarks(bookmarks::GroupId const & groupId)
m_bookmarksProcessor.Finish(IsCancelled());
}

void Processor::SearchInDownloaderByCountryName(SearchParams const & params)
{
// This index is heavy (several megabytes) but we expect that a small number of
// user sessions involves a search in downloader.
// Therefore, it is initialized lazily upon first request.
if (m_countriesNamesIndex == nullptr)
m_countriesNamesIndex = make_unique<CountriesNamesIndex>();

vector<storage::CountryId> countries;
auto trimmedQuery = params.m_query;
strings::Trim(trimmedQuery);
m_countriesNamesIndex->CollectMatchingCountries(trimmedQuery, countries);
size_t const kMaxResultsFromCountriesTree = 5;
if (countries.size() > kMaxResultsFromCountriesTree)
countries.resize(kMaxResultsFromCountriesTree);

for (auto const & country : countries)
{
m_emitter.AddResultNoChecks(Result(country, "" /* matchedName */, false));
m_emitter.Emit();
}
}

void Processor::InitParams(QueryParams & params) const
{
params.SetQuery(m_query);
Expand Down
Loading

0 comments on commit 9823764

Please sign in to comment.