Skip to content

Commit

Permalink
Merge branch 'branch-23.10' of github.com:rapidsai/raft into fea/devc…
Browse files Browse the repository at this point in the history
…ontainers
  • Loading branch information
trxcllnt committed Sep 25, 2023
2 parents 3d3161b + dfde3b4 commit 41bb5f5
Show file tree
Hide file tree
Showing 10 changed files with 3,951 additions and 601 deletions.
19 changes: 15 additions & 4 deletions cpp/include/raft/neighbors/cagra_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,10 @@ struct index : ann::index {
~index() = default;

/** Construct an empty index. */
index(raft::resources const& res)
index(raft::resources const& res,
raft::distance::DistanceType metric = raft::distance::DistanceType::L2Expanded)
: ann::index(),
metric_(raft::distance::DistanceType::L2Expanded),
metric_(metric),
dataset_(make_device_matrix<T, int64_t>(res, 0, 0)),
graph_(make_device_matrix<IdxT, int64_t>(res, 0, 0))
{
Expand Down Expand Up @@ -296,7 +297,11 @@ struct index : ann::index {
raft::host_matrix_view<const IdxT, int64_t, row_major> knn_graph)
{
RAFT_LOG_DEBUG("Copying CAGRA knn graph from host to device");
graph_ = make_device_matrix<IdxT, int64_t>(res, knn_graph.extent(0), knn_graph.extent(1));
if ((graph_.extent(0) != knn_graph.extent(0)) || (graph_.extent(1) != knn_graph.extent(1))) {
// clear existing memory before allocating to prevent OOM errors on large graphs
if (graph_.size()) { graph_ = make_device_matrix<IdxT, int64_t>(res, 0, 0); }
graph_ = make_device_matrix<IdxT, int64_t>(res, knn_graph.extent(0), knn_graph.extent(1));
}
raft::copy(graph_.data_handle(),
knn_graph.data_handle(),
knn_graph.size(),
Expand All @@ -311,7 +316,13 @@ struct index : ann::index {
mdspan<const T, matrix_extent<int64_t>, row_major, data_accessor> dataset)
{
size_t padded_dim = round_up_safe<size_t>(dataset.extent(1) * sizeof(T), 16) / sizeof(T);
dataset_ = make_device_matrix<T, int64_t>(res, dataset.extent(0), padded_dim);

if ((dataset_.extent(0) != dataset.extent(0)) ||
(static_cast<size_t>(dataset_.extent(1)) != padded_dim)) {
// clear existing memory before allocating to prevent OOM errors on large datasets
if (dataset_.size()) { dataset_ = make_device_matrix<T, int64_t>(res, 0, 0); }
dataset_ = make_device_matrix<T, int64_t>(res, dataset.extent(0), padded_dim);
}
if (dataset_.extent(1) == dataset.extent(1)) {
raft::copy(dataset_.data_handle(),
dataset.data_handle(),
Expand Down
19 changes: 13 additions & 6 deletions cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -130,15 +130,22 @@ auto deserialize(raft::resources const& res, std::istream& is) -> index<T, IdxT>
auto graph_degree = deserialize_scalar<std::uint32_t>(res, is);
auto metric = deserialize_scalar<raft::distance::DistanceType>(res, is);

auto dataset = raft::make_host_matrix<T, int64_t>(n_rows, dim);
auto graph = raft::make_host_matrix<IdxT, int64_t>(n_rows, graph_degree);
auto graph = raft::make_host_matrix<IdxT, int64_t>(n_rows, graph_degree);
deserialize_mdspan(res, is, graph.view());

bool has_dataset = deserialize_scalar<bool>(res, is);
if (has_dataset) { deserialize_mdspan(res, is, dataset.view()); }

return index<T, IdxT>(
res, metric, raft::make_const_mdspan(dataset.view()), raft::make_const_mdspan(graph.view()));
if (has_dataset) {
auto dataset = raft::make_host_matrix<T, int64_t>(n_rows, dim);
deserialize_mdspan(res, is, dataset.view());
return index<T, IdxT>(
res, metric, raft::make_const_mdspan(dataset.view()), raft::make_const_mdspan(graph.view()));
} else {
// create a new index with no dataset - the user must supply via update_dataset themselves
// later (this avoids allocating GPU memory in the meantime)
index<T, IdxT> idx(res, metric);
idx.update_graph(res, raft::make_const_mdspan(graph.view()));
return idx;
}
}

template <typename T, typename IdxT>
Expand Down
41 changes: 14 additions & 27 deletions python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,18 +254,18 @@ def create_plot_build(
xn = "k-nn"
yn = "qps"

# recall_85 = [-1] * len(linestyles)
qps_85 = [-1] * len(linestyles)
bt_85 = [0] * len(linestyles)
i_85 = [-1] * len(linestyles)
# recall_90 = [-1] * len(linestyles)

qps_90 = [-1] * len(linestyles)
bt_90 = [0] * len(linestyles)
i_90 = [-1] * len(linestyles)
# recall_95 = [-1] * len(linestyles)

qps_95 = [-1] * len(linestyles)
bt_95 = [0] * len(linestyles)
i_95 = [-1] * len(linestyles)

data = OrderedDict()
colors = OrderedDict()

Expand Down Expand Up @@ -303,7 +303,7 @@ def mean_y(algo):
plt.figure(figsize=(12, 9))
ax = df.plot.bar(rot=0, color=colors)
fig = ax.get_figure()
print(f"writing search output to {fn_out}")
print(f"writing build output to {fn_out}")
plt.title("Build Time for Highest QPS")
plt.suptitle(f"{dataset} k={k} batch_size={batch_size}")
plt.ylabel("Build Time (s)")
Expand All @@ -313,35 +313,22 @@ def mean_y(algo):
def load_lines(results_path, result_files, method, index_key):
results = dict()

linebreaker = "name,iterations"

for result_filename in result_files:
if result_filename.endswith(".csv"):
with open(os.path.join(results_path, result_filename), "r") as f:
lines = f.readlines()
lines = lines[:-1] if lines[-1] == "\n" else lines
idx = 0
for pos, line in enumerate(lines):
if linebreaker in line:
idx = pos
break

if method == "build":
if "hnswlib" in result_filename:
key_idx = [2]
else:
key_idx = [10]
key_idx = [2]
elif method == "search":
if "hnswlib" in result_filename:
key_idx = [10, 6]
else:
key_idx = [12, 10]
key_idx = [2, 3]

for line in lines[idx + 1 :]:
for line in lines[1:]:
split_lines = line.split(",")

algo_name = split_lines[0].split(".")[0].strip('"')
index_name = split_lines[0].split("/")[0].strip('"')
algo_name = split_lines[0]
index_name = split_lines[1]

if index_key == "algo":
dict_key = algo_name
Expand Down Expand Up @@ -394,9 +381,7 @@ def main():
)
parser.add_argument(
"--dataset-path",
help="path to dataset folder, by default will look in "
"RAPIDS_DATASET_ROOT_DIR if defined, otherwise a datasets "
"subdirectory from the calling directory",
help="path to dataset folder",
default=default_dataset_path,
)
parser.add_argument(
Expand Down Expand Up @@ -460,10 +445,12 @@ def main():
search = args.search

search_output_filepath = os.path.join(
args.output_filepath, f"search-{args.dataset}-{k}-{batch_size}.png"
args.output_filepath,
f"search-{args.dataset}-k{k}-batch_size{batch_size}.png",
)
build_output_filepath = os.path.join(
args.output_filepath, f"build-{args.dataset}-{k}-{batch_size}.png"
args.output_filepath,
f"build-{args.dataset}-k{k}-batch_size{batch_size}.png",
)

search_results = load_all_results(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"name": "deep-image-96-inner",
"base_file": "deep-image-96-inner/base.fbin",
"query_file": "deep-image-96-inner/query.fbin",
"groundtruth_neighbors_file": "deep-image-96-inner/groundtruth.neighbors.ibin",
"distance": "euclidean"
},
"search_basic_param": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"name": "fashion-mnist-784-euclidean",
"base_file": "fashion-mnist-784-euclidean/base.fbin",
"query_file": "fashion-mnist-784-euclidean/query.fbin",
"groundtruth_neighbors_file": "fashion-mnist-784-euclidean/groundtruth.neighbors.ibin",
"distance": "euclidean"
},
"search_basic_param": {
Expand Down
Loading

0 comments on commit 41bb5f5

Please sign in to comment.