Skip to content

Commit

Permalink
Fix and test for #3
Browse files Browse the repository at this point in the history
  • Loading branch information
RyanMarcus committed Aug 20, 2020
1 parent 2313560 commit 53e0eb7
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 90 deletions.
179 changes: 89 additions & 90 deletions rmi_lib/src/models/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,125 +4,124 @@
// See root directory of this project for license terms.
//
// < end copyright >

use crate::models::*;
use superslice::*;
use log::*;


pub fn num_bits(largest_target: u64) -> u8 {
let mut nbits = 0;
while (1 << (nbits+1)) - 1 <= largest_target {
nbits += 1;
}
nbits -= 1;
assert!((1 << (nbits+1)) - 1 <= largest_target);

return nbits;
let mut nbits = 0;
while (1 << (nbits+1)) - 1 <= largest_target {
nbits += 1;
}
assert!(nbits >= 1);

return nbits;
}

pub fn common_prefix_size<T: TrainingKey>(data: &RMITrainingData<T>) -> u8 {
let mut any_ones: u64 = 0;
let mut no_ones: u64 = !0;
let mut any_ones: u64 = 0;
let mut no_ones: u64 = !0;

for (x, _y) in data.iter_model_input() {
any_ones |= x.as_int();
no_ones &= x.as_int();
}
for (x, _y) in data.iter_model_input() {
any_ones |= x.as_int();
no_ones &= x.as_int();
}

let any_zeros = !no_ones;
let any_zeros = !no_ones;

let prefix_bits = any_zeros ^ any_ones;
return (!prefix_bits).leading_zeros() as u8;
let prefix_bits = any_zeros ^ any_ones;
return (!prefix_bits).leading_zeros() as u8;
}

fn common_prefix_size2(data: &[u64]) -> u8 {
let mut any_ones: u64 = 0;
let mut no_ones: u64 = !0;
let mut any_ones: u64 = 0;
let mut no_ones: u64 = !0;

for x in data {
any_ones |= x;
no_ones &= x;
}
for x in data {
any_ones |= x;
no_ones &= x;
}

let any_zeros = !no_ones;
let any_zeros = !no_ones;

let prefix_bits = any_zeros ^ any_ones;
return (!prefix_bits).leading_zeros() as u8;
let prefix_bits = any_zeros ^ any_ones;
return (!prefix_bits).leading_zeros() as u8;
}



pub fn radix_index(points: &[u64], num_bits: u8) -> Vec<u64> {
// build the radix index
let cps = common_prefix_size2(points);
if cps != 0 {
warn!("Radix index currently assumes the common prefix size is 0, but it was {}",
cps);
}

let mut radix_index: Vec<u64> = vec![0 ; 1 << num_bits];

let mut last_radix = 0;
for (idx, p) in points.iter().enumerate() {
let radix = p >> (64 - num_bits);
assert!(radix < radix_index.len() as u64);

if radix == last_radix { continue; }

for i in last_radix+1..radix {
radix_index[i as usize] = idx as u64; //radix_index[last_radix as usize] + 1;
}
radix_index[radix as usize] = idx as u64;
last_radix = radix;
}

for i in last_radix+1..radix_index.len() as u64 {
radix_index[i as usize] = points.len() as u64;
}

// end point
radix_index.push(points.len() as u64);
// build the radix index
let cps = common_prefix_size2(points);
if cps != 0 {
warn!("Radix index currently assumes the common prefix size is 0, but it was {}",
cps);
}

let mut radix_index: Vec<u64> = vec![0 ; 1 << num_bits];

let mut last_radix = 0;
for (idx, p) in points.iter().enumerate() {
let radix = p >> (64 - num_bits);
assert!(radix < radix_index.len() as u64);

if radix == last_radix { continue; }

// verify the radix construction
for p in points {
let radix = p >> (64 - num_bits);
let radix_lb = radix_index[radix as usize];
let radix_ub = radix_index[radix as usize + 1];

let correct_idx = (points.upper_bound(p) - 1) as u64;
assert!(radix_lb <= correct_idx,
"On key {} with radix {}, correct index was {}, but radix LB = {} and UB = {}",
p, radix, correct_idx, radix_lb, radix_ub);
assert!(radix_ub > correct_idx,
"On key {} with radix {}, correct index was {}, but radix LB = {} and UB = {}",
p, radix, correct_idx, radix_lb, radix_ub);
for i in last_radix+1..radix {
radix_index[i as usize] = idx as u64; //radix_index[last_radix as usize] + 1;
}

return radix_index;
radix_index[radix as usize] = idx as u64;
last_radix = radix;
}

for i in last_radix+1..radix_index.len() as u64 {
radix_index[i as usize] = points.len() as u64;
}

// end point
radix_index.push(points.len() as u64);

// verify the radix construction
for p in points {
let radix = p >> (64 - num_bits);
let radix_lb = radix_index[radix as usize];
let radix_ub = radix_index[radix as usize + 1];

let correct_idx = (points.upper_bound(p) - 1) as u64;
assert!(radix_lb <= correct_idx,
"On key {} with radix {}, correct index was {}, but radix LB = {} and UB = {}",
p, radix, correct_idx, radix_lb, radix_ub);
assert!(radix_ub > correct_idx,
"On key {} with radix {}, correct index was {}, but radix LB = {} and UB = {}",
p, radix, correct_idx, radix_lb, radix_ub);
}

return radix_index;
}



#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_common_prefix1() {
let data = ModelData::IntKeyToIntPos(vec![
(1, 0), (4, 4), (8, 8)
]);

assert_eq!(common_prefix_size(&data), 64-4);
}

#[test]
fn test_common_prefix2() {
let data = ModelData::IntKeyToIntPos(vec![
(1, 0), (8, 1), (9, 4), (12, 8)
]);

assert_eq!(common_prefix_size(&data), 64-4);
}
use super::*;

#[test]
fn test_common_prefix1() {
let data = ModelData::IntKeyToIntPos(vec![
(1, 0), (4, 4), (8, 8)
]);

assert_eq!(common_prefix_size(&data), 64-4);
}

#[test]
fn test_common_prefix2() {
let data = ModelData::IntKeyToIntPos(vec![
(1, 0), (8, 1), (9, 4), (12, 8)
]);

assert_eq!(common_prefix_size(&data), 64-4);
}
}
4 changes: 4 additions & 0 deletions tests/radix_model_wiki/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
rmi*
test
stdout
result
16 changes: 16 additions & 0 deletions tests/radix_model_wiki/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@

result: test
$(shell ./test > stdout)
echo $(.SHELLSTATUS) > result
cat stdout >> result

rmi.cpp: ../rmi
../rmi ../wiki_ts_200M_uint64 rmi radix,linear 1024

test: main.cpp rmi.cpp
# -lstdc++fs is required for ancient G++s
g++ -std=c++17 -Wall -O3 -ffast-math -march=native main.cpp rmi.cpp -o test -lstdc++fs

.PHONY: clean
clean:
rm -rf test result rmi*
46 changes: 46 additions & 0 deletions tests/radix_model_wiki/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#include <vector>
#include <iostream>
#include <fstream>
#include "rmi.h"

int main() {
// load the data
std::vector<uint64_t> data;
std::ifstream in("../wiki_ts_200M_uint64",
std::ios::binary);

// Read size.
uint64_t size;
in.read(reinterpret_cast<char*>(&size), sizeof(uint64_t));
data.resize(size);
// Read values.
in.read(reinterpret_cast<char*>(data.data()), size*sizeof(uint64_t));
in.close();

std::cout << "Data loaded." << std::endl;

std::cout << "RMI status: " << rmi::load("rmi_data") << std::endl;

size_t err;

for (uint64_t key_index = 0; key_index < size; key_index++) {
uint64_t lookup = data[key_index];
uint64_t true_index = (uint64_t)
std::distance(data.begin(), std::lower_bound(data.begin(),
data.end(),
lookup));
uint64_t rmi_guess = rmi::lookup(lookup, &err);

uint64_t diff = (rmi_guess > true_index ? rmi_guess - true_index : true_index - rmi_guess);
if (diff > err) {
std::cout << "Search key: " << lookup
<< " Key at " << true_index << ": " << data[true_index]
<< " RMI guess: " << rmi_guess << " +/- " << err
<< " diff: " << diff << std::endl;
exit(-1);
}
}

rmi::cleanup();
exit(0);
}

0 comments on commit 53e0eb7

Please sign in to comment.