diff --git a/README.md b/README.md index 538c33cd..6907ec4b 100644 --- a/README.md +++ b/README.md @@ -9,14 +9,9 @@ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![Downloads per month](https://img.shields.io/pypi/dm/adbnx-adapter)](https://pypi.org/project/adbnx-adapter/) -
- - - - -
-

+![](https://raw.githubusercontent.com/arangoml/networkx-adapter/1.0.0/examples/assets/logos/ArangoDB_logo.png) +![](https://raw.githubusercontent.com/arangoml/networkx-adapter/1.0.0/examples/assets/logos/networkx_logo.svg) The ArangoDB-Networkx Adapter exports Graphs from ArangoDB, a multi-model Graph Database, into NetworkX, the swiss army knife for graph analysis with python, and vice-versa. @@ -31,6 +26,43 @@ Networkx is a commonly used tool for analysis of network-data. If your analytics Get Started on Colab: Open In Colab + +```py +import networkx as nx +from adbnx_adapter.adbnx_adapter import ArangoDB_Networkx_Adapter + +con = { + "hostname": "localhost", + "protocol": "http", + "port": 8529, + "username": "root", + "password": "rootpassword", + "dbName": "_system", +} + +adbnx_adapter = ArangoDB_Networkx_Adapter(con) + +# (Assume ArangoDB fraud-detection data dump is imported) + +fraud_nx_g = adbnx_adapter.create_networkx_graph_from_arangodb_graph("fraud-detection") +fraud_nx_g_2 = adbnx_adapter.create_networkx_graph_from_arangodb_collections( + "fraud-detection", + {"account", "bank", "branch", "Class", "customer"}, + {"accountHolder", "Relationship", "transaction"} +) + + +grid_nx_g = nx.grid_2d_graph(5, 5) +grid_edge_definitions = [ + { + "edge_collection": "to", + "from_vertex_collections": ["Grid_Node"], + "to_vertex_collections": ["Grid_Node"], + } +] +adb_g = adbnx_adapter.create_arangodb_graph("Grid", grid_nx_g, grid_edge_definitions) +``` + ## Development & Testing Prerequisite: `arangorestore` must be installed diff --git a/adbnx_adapter/adbnx_adapter/adbnx_adapter.py b/adbnx_adapter/adbnx_adapter/adbnx_adapter.py index 163a4c27..a6b2507f 100644 --- a/adbnx_adapter/adbnx_adapter/adbnx_adapter.py +++ b/adbnx_adapter/adbnx_adapter/adbnx_adapter.py @@ -15,14 +15,17 @@ from arango import ArangoClient from networkx.classes.graph import Graph as NetworkXGraph -try: # Python +3.8 - from typing import final -except ImportError: # Python 3.6, 3.7 - from overrides import final - class ArangoDB_Networkx_Adapter(ADBNX_Adapter): - @final + """ArangoDB-NetworkX adapter. + + :param conn: Connection details to an ArangoDB instance. + :type conn: dict + :param controller_class: The ArangoDB-NetworkX controller, used to identify, keyify and prepare nodes & edges before insertion, optionally re-defined by the user if needed (otherwise defaults to Base_ADBNX_Controller). + :type controller_class: Base_ADBNX_Controller + :raise ValueError: If missing required keys in conn + """ + def __init__( self, conn: dict, @@ -46,10 +49,38 @@ def __init__( self.cntrl: Base_ADBNX_Controller = controller_class() - @final def create_networkx_graph( - self, name: str, graph_attributes, is_keep=True, **query_options + self, name: str, graph_attributes: dict, is_keep=True, **query_options ): + """Create a NetworkX graph from graph attributes. + + :param name: The NetworkX graph name. + :type name: str + :param graph_attributes: An object defining vertex & edge collections to import to NetworkX, along with their associated attributes to keep. + :type graph_attributes: dict + :param is_keep: Only keep the document attributes specified in **graph_attributes** when importing to NetworkX (is True by default). Otherwise, all document attributes are included. + :type is_keep: bool + :param query_options: Keyword arguments to specify AQL query options when fetching documents from the ArangoDB instance. + :type query_options: **kwargs + :return: A Multi-Directed NetworkX Graph. + :rtype: networkx.classes.multidigraph.MultiDiGraph + :raise ValueError: If missing required keys in graph_attributes + + Here is an example entry for parameter **graph_attributes**: + + .. code-block:: python + { + "vertexCollections": { + "account": {"Balance", "account_type", "customer_id", "rank"}, + "bank": {"Country", "Id", "bank_id", "bank_name"}, + "customer": {"Name", "Sex", "Ssn", "rank"}, + }, + "edgeCollections": { + "accountHolder": {"_from", "_to"}, + "transaction": {"_from", "_to"}, + }, + } + """ self.__validate_attributes("graph", set(graph_attributes), self.GRAPH_ATRIBS) self.cntrl.nx_graph = nx.MultiDiGraph(name=name) @@ -65,7 +96,6 @@ def create_networkx_graph( print(f"NetworkX: {name} created") return self.cntrl.nx_graph - @final def create_networkx_graph_from_arangodb_collections( self, name: str, @@ -73,6 +103,19 @@ def create_networkx_graph_from_arangodb_collections( edge_collections: set, **query_options, ): + """Create a NetworkX graph from ArangoDB collections. + + :param name: The NetworkX graph name. + :type name: str + :param vertex_collections: A set of ArangoDB vertex collections to import to NetworkX. + :type vertex_collections: set + :param edge_collections: A set of ArangoDB edge collections to import to NetworkX. + :type edge_collections: set + :param query_options: Keyword arguments to specify AQL query options when fetching documents from the ArangoDB instance. + :type query_options: **kwargs + :return: A Multi-Directed NetworkX Graph. + :rtype: networkx.classes.multidigraph.MultiDiGraph + """ graph_attributes = { "vertexCollections": {col: {} for col in vertex_collections}, "edgeCollections": {col: {} for col in edge_collections}, @@ -82,8 +125,17 @@ def create_networkx_graph_from_arangodb_collections( name, graph_attributes, is_keep=False, **query_options ) - @final def create_networkx_graph_from_arangodb_graph(self, name: str, **query_options): + """Create a NetworkX graph from an ArangoDB graph. + + :param name: The ArangoDB graph name. + :type name: str + :param vertex_collections: A set of ArangoDB vertex collections to import to NetworkX. + :param query_options: Keyword arguments to specify AQL query options when fetching documents from the ArangoDB instance. + :type query_options: **kwargs + :return: A Multi-Directed NetworkX Graph. + :rtype: networkx.classes.multidigraph.MultiDiGraph + """ arango_graph = self.db.graph(name) v_cols = arango_graph.vertex_collections() e_cols = {col["edge_collection"] for col in arango_graph.edge_definitions()} @@ -92,7 +144,6 @@ def create_networkx_graph_from_arangodb_graph(self, name: str, **query_options): name, v_cols, e_cols, **query_options ) - @final def create_arangodb_graph( self, name: str, @@ -101,20 +152,29 @@ def create_arangodb_graph( overwrite: bool = False, keyify_edges: bool = False, ): - """ + """Create an ArangoDB graph from a NetworkX graph, and a set of edge definitions. + + :param name: The ArangoDB graph name. + :type name: str + :param original_nx_graph: The existing NetworkX graph. + :type original_nx_graph: networkx.classes.graph.Graph + :param edge_definitions: List of edge definitions, where each edge definition entry is a dictionary with fields "edge_collection", "from_vertex_collections" and "to_vertex_collections" (see below for example). + :type edge_definitions: list[dict] + :param overwrite: If set to True, overwrites existing ArangoDB collections with the NetworkX graph data. Otherwise, will not remove existing data from collections specified in **edge_definitions**. + :type overwrite: bool + :param keyify_edges: If set to True, will create custom edge IDs based on the behavior of the ADBNX_Controller's _keyify_nx_edge() method. Otherwise, edge IDs will be randomly generated. + :type overwrite: bool + :return: The ArangoDB Graph API wrapper. + :rtype: arango.graph.Graph + Here is an example entry for parameter **edge_definitions**: .. code-block:: python [ { - 'edge_collection': 'teaches', - 'from_vertex_collections': ['person'], - 'to_vertex_collections': ['lecture'] - }, - { - 'edge_collection': 'attends', - 'from_vertex_collections': ['person'], - 'to_vertex_collections': ['lecture'] + 'edge_collection': 'teach', + 'from_vertex_collections': ['teachers'], + 'to_vertex_collections': ['lectures'] } ] """ @@ -165,16 +225,37 @@ def create_arangodb_graph( print(f"ArangoDB: {name} created") return self.cntrl.adb_graph - @final def __validate_attributes(self, type: str, attributes: set, valid_attributes: set): + """Validates that a set of attributes includes the required valid attributes. + + :param type: The context of the attribute validation (e.g connection attributes, graph attributes, etc). + :type type: str + :param attributes: The provided attributes, possibly invalid. + :type attributes: set + :param valid_attributes: The valid attributes. + :type valid_attributes: set + :raise ValueError: If **valid_attributes** is not a subset of **attributes** + """ if valid_attributes.issubset(attributes) is False: missing_attributes = valid_attributes - attributes raise ValueError(f"Missing {type} attributes: {missing_attributes}") - @final def __fetch_arangodb_docs( self, col: str, attributes: set, is_keep: bool, query_options: dict ): + """Fetches ArangoDB documents within a collection. + + :param col: The ArangoDB collection. + :type col: str + :param attributes: The set of document attributes. + :type attributes: set + :param is_keep: Only keep the document attributes specified in **attributes** when returning the document. Otherwise, all document attributes are included. + :type is_keep: bool + :param query_options: Keyword arguments to specify AQL query options when fetching documents from the ArangoDB instance. + :type query_options: **kwargs + :return: Result cursor. + :rtype: arango.cursor.Cursor + """ aql = f""" FOR doc IN {col} RETURN {is_keep} ? @@ -183,30 +264,68 @@ def __fetch_arangodb_docs( return self.db.aql.execute(aql, **query_options) - @final def __insert_networkx_node(self, adb_id: str, node: dict, col: str): + """Insert a NetworkX node into the NetworkX graph. + + :param adb_id: The ArangoDB ID of the node. + :type adb_id: str + :param node: The node object to insert. + :type node: dict + :param col: The ArangoDB collection it came from. + :type col: str + """ nx_id = self.cntrl._prepare_adb_vertex(node, col) self.cntrl.nx_map[adb_id] = {"_id": nx_id, "collection": col} self.cntrl.nx_graph.add_node(nx_id, **node) - @final def __insert_networkx_edge(self, edge: dict, col: str): + """Insert a NetworkX edge into the NetworkX graph. + + :param edge: The edge object to insert. + :type edge: dict + :param col: The ArangoDB collection it came from. + :type col: str + """ from_node_id = self.cntrl.nx_map.get(edge["_from"])["_id"] to_node_id = self.cntrl.nx_map.get(edge["_to"])["_id"] self.cntrl._prepare_adb_edge(edge, col) self.cntrl.nx_graph.add_edge(from_node_id, to_node_id, **edge) - @final def __insert_arangodb_vertex(self, id, v: dict, col: str, key: str, ow: bool): + """Insert an ArangoDB vertex into an ArangoDB collection. + + :param id: The NetworkX ID of the vertex. + :type id: Any + :param v: The vertex object to insert. + :type v: dict + :param col: The ArangoDB collection the vertex belongs to. + :type col: str + :param key: The _key value of the vertex. + :type key: str + :param ow: If set to True, overwrite existing document with matching _id. + :type ow: bool + """ self.cntrl.adb_map[id] = {"_id": v["_id"], "collection": col, "key": key} self.db.collection(col).insert(v, overwrite=ow, silent=True) - @final def __insert_arangodb_edge( self, edge: dict, from_node: dict, to_node: dict, col: str, ow: bool ): + """Insert an ArangoDB edge into an ArangoDB collection. + + :param edge: The edge object to insert. + :type edge: dict + :param from_node: The NetworkX node object representing the edge source. + :type from_node: dict + :param to_node: The NetworkX node object representing the edge destination. + :type to_node: dict + :param col: The ArangoDB collection the edge belongs to. + :type col: str + :param ow: If set to True, overwrite existing document with matching _id. + :type ow: bool + """ edge["_from"] = self.cntrl.adb_map.get(from_node["id"])["_id"] edge["_to"] = self.cntrl.adb_map.get(to_node["id"])["_id"] self.db.collection(col).insert(edge, overwrite=ow, silent=True) diff --git a/adbnx_adapter/adbnx_adapter/adbnx_controller.py b/adbnx_adapter/adbnx_adapter/adbnx_controller.py index 88c9f6ab..2f222faf 100644 --- a/adbnx_adapter/adbnx_adapter/adbnx_controller.py +++ b/adbnx_adapter/adbnx_adapter/adbnx_controller.py @@ -3,45 +3,67 @@ from arango.graph import Graph as ArangoDBGraph from networkx.classes.graph import Graph as NetworkXGraph -try: # Python +3.8 - from typing import final -except ImportError: # Python 3.6, 3.7 - from overrides import final - class Base_ADBNX_Controller(ADBNX_Controller): + """ArangoDB-NetworkX controller. + + Responsible for controlling how nodes & edges are handled when + transitioning from ArangoDB to NetworkX, and vice-versa. + """ + def __init__(self): self.nx_graph: NetworkXGraph = None self.nx_map = dict() # Maps ArangoDB vertex IDs to NetworkX node IDs self.adb_graph: ArangoDBGraph = None - self.adb_map = dict() # Maps NetworkX node IDs to ArangoDB vertex IDs def _prepare_adb_vertex(self, vertex: dict, collection: str): - """ + """Prepare an ArangoDB vertex before it gets inserted into the NetworkX graph. + Given an ArangoDB vertex, you can modify it before it gets inserted into the NetworkX graph, and/or derive a custom node id for networkx to use. In most cases, it is only required to return the ArangoDB _id of the vertex. + + :param vertex: The ArangoDB vertex object to (optionally) modify. + :type vertex: dict + :param collection: The ArangoDB collection the vertex belongs to. + :type collection: str + :return: The ArangoDB _id attribute of the vertex. + :rtype: str """ return vertex["_id"] def _prepare_adb_edge(self, edge: dict, collection: str): - """ + """Prepare an ArangoDB edge before it gets inserted into the NetworkX graph. + Given an ArangoDB edge, you can modify it before it gets inserted into the NetworkX graph. In most cases, no action is needed. + + :param edge: The ArangoDB edge object to (optionally) modify. + :type edge: dict + :param collection: The ArangoDB collection the edge belongs to. + :type collection: str """ pass def _identify_nx_node(self, id, node: dict, overwrite: bool) -> str: - """ - Given a NetworkX node, identify what ArangoDB collection should it belong to. + """Given a NetworkX node, identify what ArangoDB collection it should belong to. NOTE: If your NetworkX graph does not comply to ArangoDB standards (i.e a node's ID is not "collection/key"), then you must override this function. + + :param id: The NetworkX ID of the node. + :type id: Any + :param node: The NetworkX node object. + :type node: dict + :param overwrite: Whether overwrite is enabled or not. + :type overwrite: bool + :return: The ArangoDB collection name + :rtype: str """ # In this case, id is already a valid ArangoDB _id adb_id: str = id @@ -50,23 +72,40 @@ def _identify_nx_node(self, id, node: dict, overwrite: bool) -> str: def _identify_nx_edge( self, edge: dict, from_node: dict, to_node: dict, overwrite: bool ) -> str: - """ - Given a NetworkX edge, its pair of nodes, and the overwrite boolean, - identify what ArangoDB collection should it belong to. + """Given a NetworkX edge, its pair of nodes, and the overwrite boolean, identify what ArangoDB collection should it belong to. NOTE: If your NetworkX graph does not comply to ArangoDB standards (i.e a node's ID is not "collection/key"), then you must override this function. + + :param edge: The NetworkX edge object. + :type edge: dict + :param from_node: The NetworkX node object representing the edge source. + :type from_node: dict + :param to_node: The NetworkX node object representing the edge destination. + :type to_node: dict + :param overwrite: Whether overwrite is enabled or not. + :type overwrite: bool + :return: The ArangoDB collection name + :rtype: str """ # In this case, edge["_id"] is already a valid ArangoDB _id edge_id: str = edge["_id"] return edge_id.split("/")[0] + ("" if overwrite else "_nx") def _keyify_nx_node(self, id, node: dict, collection: str, overwrite: bool) -> str: - """ - Given a NetworkX node, derive its valid ArangoDB key. + """Given a NetworkX node, derive its valid ArangoDB key. NOTE: If your NetworkX graph does not comply to ArangoDB standards (i.e a node's ID is not "collection/key"), then you must override this function. + + :param node: The NetworkX node object. + :type node: dict + :param collection: The ArangoDB collection the node belongs to. + :type collection: str + :param overwrite: Whether overwrite is enabled or not. + :type overwrite: bool + :return: A valid ArangoDB _key value. + :rtype: str """ # In this case, id is already a valid ArangoDB _id adb_id: str = id @@ -80,21 +119,36 @@ def _keyify_nx_edge( collection: str, overwrite: bool, ): - """ - Given a NetworkX edge, its collection, its pair of nodes, and the overwrite boolean, - derive its valid ArangoDB key. + """Given a NetworkX edge, its collection, its pair of nodes, and the overwrite boolean, + derive its valid ArangoDB key. NOTE: If your NetworkX graph does not comply to ArangoDB standards (i.e a node's ID is not "collection/key"), then you must override this function. + + :param edge: The NetworkX edge object. + :type edge: dict + :param from_node: The NetworkX node object representing the edge source. + :type from_node: dict + :param to_node: The NetworkX node object representing the edge destination. + :type to_node: dict + :param collection: The ArangoDB collection the node belongs to. + :type collection: str + :param overwrite: Whether overwrite is enabled or not. + :type overwrite: bool + :return: The ArangoDB collection name + :rtype: str """ # In this case, edge["_id"] is already a valid ArangoDB _id edge_id: str = edge["_id"] return edge_id.split("/")[1] - @final def _string_to_arangodb_key_helper(self, string: str) -> str: - """ - Given a string, derive a valid ArangoDB _key string. + """Given a string, derive a valid ArangoDB _key string. + + :param string: A (possibly) invalid _key string value. + :type string: str + :return: A valid ArangoDB _key value. + :rtype: str """ res = "" for s in string: @@ -103,10 +157,13 @@ def _string_to_arangodb_key_helper(self, string: str) -> str: return res - @final def _tuple_to_arangodb_key_helper(self, tup: tuple) -> str: - """ - Given a tuple, derive a valid ArangoDB _key string. + """Given a tuple, derive a valid ArangoDB _key string. + + :param tup: A tuple with non-None values. + :type tup: tuple + :return: A valid ArangoDB _key value. + :rtype: str """ string = "".join(map(str, tup)) return self._string_to_arangodb_key_helper(string) diff --git a/adbnx_adapter/requirements.txt b/adbnx_adapter/requirements.txt deleted file mode 100644 index 157a50ee..00000000 --- a/adbnx_adapter/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -networkx==2.6.3 -python-arango==7.2.0 -pytest==6.2.5 -overrides==6.1.0 \ No newline at end of file diff --git a/adbnx_adapter/setup.py b/adbnx_adapter/setup.py index 0186c5f0..6d253f9b 100644 --- a/adbnx_adapter/setup.py +++ b/adbnx_adapter/setup.py @@ -1,18 +1,13 @@ -import pathlib from setuptools import setup -# The directory containing this file -HERE = pathlib.Path(__file__).resolve().parents[1] - with open("../README.md", "r") as f: long_description = f.read() -# This call to setup() does all the work setup( name="adbnx_adapter", author="ArangoDB", author_email="rajiv@arangodb.com", - version="1.0.0", + version="1.0.1", description="Convert ArangoDB graphs to NetworkX & vice-versa.", long_description=long_description, long_description_content_type="text/markdown", @@ -21,7 +16,7 @@ include_package_data=True, python_requires=">=3.6", license="Apache Software License", - install_requires=["python-arango", "networkx", "overrides"], + install_requires=["python-arango==7.2.0", "networkx>=2.5.1,<=2.6.3"], tests_require=["pytest", "pytest-cov"], classifiers=[ "Intended Audience :: Developers", diff --git a/examples/ArangoDB_NetworkxAdapter.ipynb b/examples/ArangoDB_NetworkxAdapter.ipynb index 35806984..f2587c51 100644 --- a/examples/ArangoDB_NetworkxAdapter.ipynb +++ b/examples/ArangoDB_NetworkxAdapter.ipynb @@ -24,8 +24,8 @@ "id": "Nx9aa3LAeG89" }, "source": [ - "![networkX](https://github.com/arangoml/networkx-adapter/blob/master/examples/assets/logos/networkx_logo.svg?raw=1) \n", - "![arangodb](https://github.com/arangoml/networkx-adapter/blob/master/examples/assets/logos/ArangoDB_logo.png?raw=1) " + "![arangodb](https://github.com/arangoml/networkx-adapter/blob/master/examples/assets/logos/ArangoDB_logo.png?raw=1) \n", + "![networkX](https://github.com/arangoml/networkx-adapter/blob/master/examples/assets/logos/networkx_logo.svg?raw=1) " ] }, { @@ -34,9 +34,9 @@ "id": "bpvZS-1aeG89" }, "source": [ - "Version: 1.0.0\n", + "Version: 1.0.1\n", "\n", - "In this Notebook we learn how to export Graphs from [ArangoDB](https://www.arangodb.com/), a multi-model Graph Database into [NetworkX](https://networkx.github.io/), the swiss army knife for graph analysis ion python." + "In this Notebook we learn how to export Graphs from [ArangoDB](https://www.arangodb.com/), a multi-model Graph Database into [NetworkX](https://networkx.github.io/), the swiss army knife for graph analysis ion python, and vice-versa." ] }, { @@ -58,10 +58,10 @@ "source": [ "%%capture\n", "!git clone -b oasis_connector --single-branch https://github.com/arangodb/interactive_tutorials.git\n", - "!git clone -b 1.0.0 --single-branch https://github.com/arangoml/networkx-adapter.git\n", + "!git clone -b 1.0.1 --single-branch https://github.com/arangoml/networkx-adapter.git\n", "!rsync -av networkx-adapter/examples/ ./ --exclude=.git\n", "!rsync -av interactive_tutorials/ ./ --exclude=.git\n", - "!pip3 install adbnx_adapter==1.0.0\n", + "!pip3 install adbnx_adapter==1.0.1\n", "!pip3 install matplotlib\n", "!pip3 install pyArango" ]