Skip to content

Commit

Permalink
minor
Browse files Browse the repository at this point in the history
  • Loading branch information
Ricardo J. Rodríguez committed Jul 24, 2024
2 parents 7bc022e + 0f1f6a9 commit 215b9b6
Show file tree
Hide file tree
Showing 14 changed files with 405 additions and 426 deletions.
362 changes: 343 additions & 19 deletions apotheosis.py

Large diffs are not rendered by default.

381 changes: 0 additions & 381 deletions apotheosis_winmodule.py

This file was deleted.

3 changes: 3 additions & 0 deletions common/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ class NodeNotFoundError(Exception):
class NodeAlreadyExistsError(Exception):
pass

class NodeUnsupportedAlgorithm(Exception):
pass

# database-related errors
class HashValueNotInDBError(Exception):
pass
Expand Down
6 changes: 3 additions & 3 deletions common/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ def create_model(npages, nsearch_pages,\
M, ef, Mmax, Mmax0, heuristic, extend_candidates, keep_pruned_conns,\
distance_algorithm, beer_factor):

from apotheosis_winmodule import ApotheosisWinModule # avoid circular deps
print(f"[*] Building ApotheosisWinModule model ({M},{ef},{Mmax},{Mmax0}) from DB ... ")
current_model = ApotheosisWinModule(M=M, ef=ef, Mmax=Mmax, Mmax0=Mmax0,
from apotheosis import Apotheosis # avoid circular deps
print(f"[*] Building Apotheosis model ({M},{ef},{Mmax},{Mmax0}) from DB ... ")
current_model = Apotheosis(M=M, ef=ef, Mmax=Mmax, Mmax0=Mmax0,
heuristic=heuristic, extend_candidates=extend_candidates,
keep_pruned_conns=keep_pruned_conns, distance_algorithm=distance_algorithm,
beer_factor=beer_factor)
Expand Down
11 changes: 11 additions & 0 deletions datalayer/node/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def set_neighbors_at_layer(self, layer: int, neighbors: set):
except:
raise NodeLayerError

#TODO are these two methods necessary here? Too much acopled
# only in HashNode
def calculate_similarity(self, other_node):
raise NotImplementedError
Expand All @@ -47,8 +48,18 @@ def get_pageids(self):
def internal_serialize(self):
raise NotImplementedError
# to be implemented in final classes
@classmethod
def internal_load(cls, f):
raise NotImplementedError
# to be implemented in final classes
@classmethod
def create_node_from_DB(cls, db_manager, _id, hash_algoritmh):
raise NotImplementedError
# to be implemented in final classes
@classmethod
def internal_data_needs_DB(cls) -> bool:
raise NotImplementedError


def print_neighbors(self):
string = ""
Expand Down
24 changes: 23 additions & 1 deletion datalayer/node/winmodule_hash_node.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
#TODO docstring
from datalayer.node.hash_node import HashNode
from datalayer.hash_algorithm.hash_algorithm import HashAlgorithm
from datalayer.hash_algorithm.tlsh_algorithm import TLSHHashAlgorithm
from datalayer.hash_algorithm.ssdeep_algorithm import SSDEEPHashAlgorithm
from datalayer.database.module import Module
from datalayer.database.page import Page
from common.constants import *
from common.errors import NodeUnsupportedAlgorithm

class WinModuleHashNode(HashNode):
def __init__(self, id, hash_algorithm: HashAlgorithm, module: Module=None, page: Page=None):
Expand Down Expand Up @@ -32,8 +35,27 @@ def get_draw_features(self):
def internal_serialize(self):
return self.get_internal_page_id().to_bytes(I_SIZE, byteorder=BYTE_ORDER)

@classmethod
def internal_load(cls, f):
raise Exception
bpage_id = f.read(I_SIZE)
return bpage_id, int.from_bytes(bpage_id, byteorder=BYTE_ORDER)

@classmethod
def create_node_from_DB(cls, db_manager, _id, hash_algorithm):
new_node = db_manager.get_winmodule_data_by_pageid(page_id=_id, algorithm=hash_algorithm)
if hash_algorithm == TLSHHashAlgorithm:
new_node._id = new_node._page.hashTLSH
elif hash_algorithm == SSDEEPHashAlgorithm:
new_node._id = new_node._page.hashSSDEEP
else:
raise NodeUnsupportedAlgorithm # algorithm not supported

return new_node

@classmethod
def internal_data_needs_DB(cls) -> bool:
return True # we have some data necessary to retrieve from the DB
# to load a WinModuleHashNode from an Apotheosis file

def is_equal(self, other):
if type(self) != type(other):
Expand Down
2 changes: 1 addition & 1 deletion datalayer/radix_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def insert(self, hash_node: HashNode):
"""

logging.info(f"Inserting \"{hash_node.get_id()}\" in the radix hash tree ... ")
self._root.insert(hash_node.get_id(), hash_node)
self._root.insert(str(hash_node.get_id()), hash_node)

def search(self, hash_value: str) -> (bool, HashNode):
"""Returns True and the associated hash node if the hash value is on the radix hash tree, (False, None) otherwise
Expand Down
6 changes: 3 additions & 3 deletions tests/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from apotheosis_winmodule import ApotheosisWinModule
from apotheosis import Apotheosis
from datalayer.hash_algorithm.tlsh_algorithm import TLSHHashAlgorithm
from datalayer.hash_algorithm.ssdeep_algorithm import SSDEEPHashAlgorithm
from common.errors import NodeAlreadyExistsError
Expand Down Expand Up @@ -99,7 +99,7 @@ def perform_benchmark(percentage, all_node_pages, hnsw_config, heuristic, distan
try:
n_pages = int(percentage * len(all_node_pages))
print("Benchmarking model ({}, {}, {}, {}) with {} pages".format(*hnsw_config, n_pages))
current_model = ApotheosisWinModule(M=hnsw_config[0], ef=hnsw_config[1],\
current_model = Apotheosis(M=hnsw_config[0], ef=hnsw_config[1],\
Mmax=hnsw_config[2], Mmax0=hnsw_config[3],\
heuristic=heuristic, extend_candidates=False, \
keep_pruned_conns=False,\
Expand Down Expand Up @@ -133,7 +133,7 @@ def _get_algorithm_instance(algorithm):
import os

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="ApotheosisWinModule benchmark")
parser = argparse.ArgumentParser(description="Apotheosis benchmark")
parser.add_argument('--distance-algorithm', '-da', required=True,
choices=['tlsh', 'ssdeep'],
help='Specify the hash algorithm (tlsh or ssdeep)')
Expand Down
6 changes: 3 additions & 3 deletions tests/draw_cluster_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import common.utilities as util
from datalayer.db_manager import DBManager

from apotheosis_winmodule import ApotheosisWinModule
from apotheosis import Apotheosis
from datalayer.hash_algorithm.tlsh_algorithm import TLSHHashAlgorithm
from datalayer.hash_algorithm.ssdeep_algorithm import SSDEEPHashAlgorithm
from datalayer.node.winmodule_hash_node import WinModuleHashNode
Expand All @@ -16,8 +16,8 @@ def create_model(npages, M, ef, Mmax, Mmax0, heuristic, extend_candidates, keep_
print("[*] Getting DB pages ... ", end='')
all_pages, win_modules = dbManager.get_winmodules(distance_algorithm, npages)
print("done!")
print(f"[*] Building ApotheosisWinModule model ({M},{ef},{Mmax},{Mmax0}) ... ")
current_model = ApotheosisWinModule(M=M, ef=ef, Mmax=Mmax, Mmax0=Mmax0,
print(f"[*] Building Apotheosis model ({M},{ef},{Mmax},{Mmax0}) ... ")
current_model = Apotheosis(M=M, ef=ef, Mmax=Mmax, Mmax0=Mmax0,
distance_algorithm=distance_algorithm)
_page_list = []
for i in range(0, npages):
Expand Down
4 changes: 2 additions & 2 deletions tests/dump_load_WinModule_test.py → tests/dump_load_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import common.utilities as util
from datalayer.db_manager import DBManager

from apotheosis_winmodule import ApotheosisWinModule
from apotheosis import Apotheosis
from datalayer.hash_algorithm.tlsh_algorithm import TLSHHashAlgorithm
from datalayer.hash_algorithm.ssdeep_algorithm import SSDEEPHashAlgorithm
from datalayer.node.winmodule_hash_node import WinModuleHashNode
Expand Down Expand Up @@ -41,7 +41,7 @@ def main():
current_model.dump(filename)
print(f"[*] Loading from \"{filename}\" ...")
db_manager = DBManager()
model = ApotheosisWinModule.load(filename, distance_algorithm=algorithm)
model = Apotheosis.load(filename, distance_algorithm=algorithm, hash_node_class=WinModuleHashNode)
equal = current_model == model
if not equal:
breakpoint()
Expand Down
12 changes: 6 additions & 6 deletions tests/regression.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from apotheosis_winmodule import ApotheosisWinModule
from apotheosis import Apotheosis
import common.utilities as utils

from pytest_regressions.data_regression import DataRegressionFixture
Expand Down Expand Up @@ -34,10 +34,10 @@ def create_dict_result(found, node, result_dict):
return result

def test_insertion(data_regression: DataRegressionFixture):
apo_model = ApotheosisWinModule(M=64, ef=32, Mmax=64, Mmax0=128,
apo_model = Apotheosis(M=64, ef=32, Mmax=64, Mmax0=128,
distance_algorithm=TLSHHashAlgorithm)

print("[*] Building ApotheosisWinModule with TLSH ...")
print("[*] Building Apotheosis with TLSH ...")
for hash in HASHES:
apo_model.insert(HashNode(hash, TLSHHashAlgorithm))

Expand All @@ -49,7 +49,7 @@ def test_insertion(data_regression: DataRegressionFixture):
data_regression.check(result_dict)

def test_search(data_regression: DataRegressionFixture):
apo_model = ApotheosisWinModule(M=64, ef=32, Mmax=64, Mmax0=128,
apo_model = Apotheosis(M=64, ef=32, Mmax=64, Mmax0=128,
distance_algorithm=TLSHHashAlgorithm)

for hash in HASHES:
Expand All @@ -64,7 +64,7 @@ def test_search(data_regression: DataRegressionFixture):
data_regression.check(search_results)

def test_search_threshold(data_regression: DataRegressionFixture):
apo_model = ApotheosisWinModule(M=64, ef=32, Mmax=64, Mmax0=128,
apo_model = Apotheosis(M=64, ef=32, Mmax=64, Mmax0=128,
distance_algorithm=TLSHHashAlgorithm)

for hash in HASHES:
Expand All @@ -80,7 +80,7 @@ def test_search_threshold(data_regression: DataRegressionFixture):


def test_deletion(data_regression: DataRegressionFixture):
apo_model = ApotheosisWinModule(M=64, ef=32, Mmax=64, Mmax0=128,
apo_model = Apotheosis(M=64, ef=32, Mmax=64, Mmax0=128,
distance_algorithm=TLSHHashAlgorithm)

for hash in HASHES:
Expand Down
6 changes: 3 additions & 3 deletions tests/search_insert_times_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from common import utilities as util
from datalayer.db_manager import DBManager

from apotheosis_winmodule import ApotheosisWinModule
from apotheosis import Apotheosis
from datalayer.hash_algorithm.tlsh_algorithm import TLSHHashAlgorithm
from datalayer.hash_algorithm.ssdeep_algorithm import SSDEEPHashAlgorithm
from datalayer.node.winmodule_hash_node import WinModuleHashNode
Expand All @@ -34,7 +34,7 @@ def search(pages_search, current_model, search_recall):
def main():
parser = util.configure_argparse()
parser.add_argument('-recall', '--search-recall', type=int, default=4, help="Search recall (default=4)")
parser.add_argument('-dump', '--dump-file', type=str, help="Filename to dump ApotheosisWinModule data structure")
parser.add_argument('-dump', '--dump-file', type=str, help="Filename to dump Apotheosis data structure")
parser.add_argument('-np', '--npages', type=int, default=1000, help="Number of pages to test (default=1000)")
parser.add_argument('-ns', '--nsearch-pages', type=int, default=0, help='Number of pages to search from outside the model (using HNSW)')
args = parser.parse_args()
Expand Down Expand Up @@ -75,7 +75,7 @@ def main():
print(f"[*] Dumping to \"{filename}\" ...")
current_model.dump(filename)
print(f"[*] Loading from \"{filename}\" ...")
model = ApotheosisWinModule.load(filename, distance_algorithm=algorithm, db_manager=db_manager)
model = Apotheosis.load(filename, distance_algorithm=algorithm, db_manager=db_manager)
equal = current_model == model
if not equal:
breakpoint()
Expand Down
6 changes: 3 additions & 3 deletions tests/search_modules_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import common.utilities as util
from datalayer.db_manager import DBManager

from apotheosis_winmodule import ApotheosisWinModule
from apotheosis import Apotheosis
from datalayer.hash_algorithm.tlsh_algorithm import TLSHHashAlgorithm
from datalayer.hash_algorithm.ssdeep_algorithm import SSDEEPHashAlgorithm
from datalayer.node.winmodule_hash_node import WinModuleHashNode
Expand All @@ -17,8 +17,8 @@ def create_model(modules_of_interest, npages, M, ef, Mmax, Mmax0, heuristic, ext
print("[*] Getting DB pages ... ", end='')
all_pages, modules = dbManager.get_winmodules(distance_algorithm, limit=npages, modules_of_interest=modules_of_interest)
print("done!")
print(f"[*] Building ApotheosisWinModule model ({M},{ef},{Mmax},{Mmax0}) ... ")
current_model = ApotheosisWinModule(M=M, ef=ef, Mmax=Mmax, Mmax0=Mmax0,
print(f"[*] Building Apotheosis model ({M},{ef},{Mmax},{Mmax0}) ... ")
current_model = Apotheosis(M=M, ef=ef, Mmax=Mmax, Mmax0=Mmax0,
distance_algorithm=distance_algorithm)
page_list = []
for i in range(0, npages):
Expand Down
2 changes: 1 addition & 1 deletion tests/sensitivity_search_insert_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def run_search_insert_test(M: int=4, ef: int=4, Mmax: int=16,\

if __name__ == '__main__':
parser = util.configure_argparse()
parser.add_argument('-dump', '--dump-file', type=str, help="Filename to dump ApotheosisWinModule data structure")
parser.add_argument('-dump', '--dump-file', type=str, help="Filename to dump Apotheosis data structure")
parser.add_argument('-recall', '--search-recall', type=int, default=4, help="Search recall (default=4)")
parser.add_argument('--npages', type=int, default=1000, help="Number of pages to test (default=1000)")
parser.add_argument('--nsearch-pages', type=int, default=0, help="Number of pages to search (default=0)")
Expand Down

0 comments on commit 215b9b6

Please sign in to comment.