diff --git a/README.md b/README.md
index 538c33cd..6907ec4b 100644
--- a/README.md
+++ b/README.md
@@ -9,14 +9,9 @@
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
[![Downloads per month](https://img.shields.io/pypi/dm/adbnx-adapter)](https://pypi.org/project/adbnx-adapter/)
-
-
-
-
-
-
-
+![](https://raw.githubusercontent.com/arangoml/networkx-adapter/1.0.0/examples/assets/logos/ArangoDB_logo.png)
+![](https://raw.githubusercontent.com/arangoml/networkx-adapter/1.0.0/examples/assets/logos/networkx_logo.svg)
The ArangoDB-Networkx Adapter exports Graphs from ArangoDB, a multi-model Graph Database, into NetworkX, the swiss army knife for graph analysis with python, and vice-versa.
@@ -31,6 +26,43 @@ Networkx is a commonly used tool for analysis of network-data. If your analytics
Get Started on Colab:
+
+```py
+import networkx as nx
+from adbnx_adapter.adbnx_adapter import ArangoDB_Networkx_Adapter
+
+con = {
+ "hostname": "localhost",
+ "protocol": "http",
+ "port": 8529,
+ "username": "root",
+ "password": "rootpassword",
+ "dbName": "_system",
+}
+
+adbnx_adapter = ArangoDB_Networkx_Adapter(con)
+
+# (Assume ArangoDB fraud-detection data dump is imported)
+
+fraud_nx_g = adbnx_adapter.create_networkx_graph_from_arangodb_graph("fraud-detection")
+fraud_nx_g_2 = adbnx_adapter.create_networkx_graph_from_arangodb_collections(
+ "fraud-detection",
+ {"account", "bank", "branch", "Class", "customer"},
+ {"accountHolder", "Relationship", "transaction"}
+)
+
+
+grid_nx_g = nx.grid_2d_graph(5, 5)
+grid_edge_definitions = [
+ {
+ "edge_collection": "to",
+ "from_vertex_collections": ["Grid_Node"],
+ "to_vertex_collections": ["Grid_Node"],
+ }
+]
+adb_g = adbnx_adapter.create_arangodb_graph("Grid", grid_nx_g, grid_edge_definitions)
+```
+
## Development & Testing
Prerequisite: `arangorestore` must be installed
diff --git a/adbnx_adapter/adbnx_adapter/adbnx_adapter.py b/adbnx_adapter/adbnx_adapter/adbnx_adapter.py
index 163a4c27..a6b2507f 100644
--- a/adbnx_adapter/adbnx_adapter/adbnx_adapter.py
+++ b/adbnx_adapter/adbnx_adapter/adbnx_adapter.py
@@ -15,14 +15,17 @@
from arango import ArangoClient
from networkx.classes.graph import Graph as NetworkXGraph
-try: # Python +3.8
- from typing import final
-except ImportError: # Python 3.6, 3.7
- from overrides import final
-
class ArangoDB_Networkx_Adapter(ADBNX_Adapter):
- @final
+ """ArangoDB-NetworkX adapter.
+
+ :param conn: Connection details to an ArangoDB instance.
+ :type conn: dict
+ :param controller_class: The ArangoDB-NetworkX controller, used to identify, keyify and prepare nodes & edges before insertion, optionally re-defined by the user if needed (otherwise defaults to Base_ADBNX_Controller).
+ :type controller_class: Base_ADBNX_Controller
+ :raise ValueError: If missing required keys in conn
+ """
+
def __init__(
self,
conn: dict,
@@ -46,10 +49,38 @@ def __init__(
self.cntrl: Base_ADBNX_Controller = controller_class()
- @final
def create_networkx_graph(
- self, name: str, graph_attributes, is_keep=True, **query_options
+ self, name: str, graph_attributes: dict, is_keep=True, **query_options
):
+ """Create a NetworkX graph from graph attributes.
+
+ :param name: The NetworkX graph name.
+ :type name: str
+ :param graph_attributes: An object defining vertex & edge collections to import to NetworkX, along with their associated attributes to keep.
+ :type graph_attributes: dict
+ :param is_keep: Only keep the document attributes specified in **graph_attributes** when importing to NetworkX (is True by default). Otherwise, all document attributes are included.
+ :type is_keep: bool
+ :param query_options: Keyword arguments to specify AQL query options when fetching documents from the ArangoDB instance.
+ :type query_options: **kwargs
+ :return: A Multi-Directed NetworkX Graph.
+ :rtype: networkx.classes.multidigraph.MultiDiGraph
+ :raise ValueError: If missing required keys in graph_attributes
+
+ Here is an example entry for parameter **graph_attributes**:
+
+ .. code-block:: python
+ {
+ "vertexCollections": {
+ "account": {"Balance", "account_type", "customer_id", "rank"},
+ "bank": {"Country", "Id", "bank_id", "bank_name"},
+ "customer": {"Name", "Sex", "Ssn", "rank"},
+ },
+ "edgeCollections": {
+ "accountHolder": {"_from", "_to"},
+ "transaction": {"_from", "_to"},
+ },
+ }
+ """
self.__validate_attributes("graph", set(graph_attributes), self.GRAPH_ATRIBS)
self.cntrl.nx_graph = nx.MultiDiGraph(name=name)
@@ -65,7 +96,6 @@ def create_networkx_graph(
print(f"NetworkX: {name} created")
return self.cntrl.nx_graph
- @final
def create_networkx_graph_from_arangodb_collections(
self,
name: str,
@@ -73,6 +103,19 @@ def create_networkx_graph_from_arangodb_collections(
edge_collections: set,
**query_options,
):
+ """Create a NetworkX graph from ArangoDB collections.
+
+ :param name: The NetworkX graph name.
+ :type name: str
+ :param vertex_collections: A set of ArangoDB vertex collections to import to NetworkX.
+ :type vertex_collections: set
+ :param edge_collections: A set of ArangoDB edge collections to import to NetworkX.
+ :type edge_collections: set
+ :param query_options: Keyword arguments to specify AQL query options when fetching documents from the ArangoDB instance.
+ :type query_options: **kwargs
+ :return: A Multi-Directed NetworkX Graph.
+ :rtype: networkx.classes.multidigraph.MultiDiGraph
+ """
graph_attributes = {
"vertexCollections": {col: {} for col in vertex_collections},
"edgeCollections": {col: {} for col in edge_collections},
@@ -82,8 +125,17 @@ def create_networkx_graph_from_arangodb_collections(
name, graph_attributes, is_keep=False, **query_options
)
- @final
def create_networkx_graph_from_arangodb_graph(self, name: str, **query_options):
+ """Create a NetworkX graph from an ArangoDB graph.
+
+ :param name: The ArangoDB graph name.
+ :type name: str
+ :param vertex_collections: A set of ArangoDB vertex collections to import to NetworkX.
+ :param query_options: Keyword arguments to specify AQL query options when fetching documents from the ArangoDB instance.
+ :type query_options: **kwargs
+ :return: A Multi-Directed NetworkX Graph.
+ :rtype: networkx.classes.multidigraph.MultiDiGraph
+ """
arango_graph = self.db.graph(name)
v_cols = arango_graph.vertex_collections()
e_cols = {col["edge_collection"] for col in arango_graph.edge_definitions()}
@@ -92,7 +144,6 @@ def create_networkx_graph_from_arangodb_graph(self, name: str, **query_options):
name, v_cols, e_cols, **query_options
)
- @final
def create_arangodb_graph(
self,
name: str,
@@ -101,20 +152,29 @@ def create_arangodb_graph(
overwrite: bool = False,
keyify_edges: bool = False,
):
- """
+ """Create an ArangoDB graph from a NetworkX graph, and a set of edge definitions.
+
+ :param name: The ArangoDB graph name.
+ :type name: str
+ :param original_nx_graph: The existing NetworkX graph.
+ :type original_nx_graph: networkx.classes.graph.Graph
+ :param edge_definitions: List of edge definitions, where each edge definition entry is a dictionary with fields "edge_collection", "from_vertex_collections" and "to_vertex_collections" (see below for example).
+ :type edge_definitions: list[dict]
+ :param overwrite: If set to True, overwrites existing ArangoDB collections with the NetworkX graph data. Otherwise, will not remove existing data from collections specified in **edge_definitions**.
+ :type overwrite: bool
+ :param keyify_edges: If set to True, will create custom edge IDs based on the behavior of the ADBNX_Controller's _keyify_nx_edge() method. Otherwise, edge IDs will be randomly generated.
+ :type overwrite: bool
+ :return: The ArangoDB Graph API wrapper.
+ :rtype: arango.graph.Graph
+
Here is an example entry for parameter **edge_definitions**:
.. code-block:: python
[
{
- 'edge_collection': 'teaches',
- 'from_vertex_collections': ['person'],
- 'to_vertex_collections': ['lecture']
- },
- {
- 'edge_collection': 'attends',
- 'from_vertex_collections': ['person'],
- 'to_vertex_collections': ['lecture']
+ 'edge_collection': 'teach',
+ 'from_vertex_collections': ['teachers'],
+ 'to_vertex_collections': ['lectures']
}
]
"""
@@ -165,16 +225,37 @@ def create_arangodb_graph(
print(f"ArangoDB: {name} created")
return self.cntrl.adb_graph
- @final
def __validate_attributes(self, type: str, attributes: set, valid_attributes: set):
+ """Validates that a set of attributes includes the required valid attributes.
+
+ :param type: The context of the attribute validation (e.g connection attributes, graph attributes, etc).
+ :type type: str
+ :param attributes: The provided attributes, possibly invalid.
+ :type attributes: set
+ :param valid_attributes: The valid attributes.
+ :type valid_attributes: set
+ :raise ValueError: If **valid_attributes** is not a subset of **attributes**
+ """
if valid_attributes.issubset(attributes) is False:
missing_attributes = valid_attributes - attributes
raise ValueError(f"Missing {type} attributes: {missing_attributes}")
- @final
def __fetch_arangodb_docs(
self, col: str, attributes: set, is_keep: bool, query_options: dict
):
+ """Fetches ArangoDB documents within a collection.
+
+ :param col: The ArangoDB collection.
+ :type col: str
+ :param attributes: The set of document attributes.
+ :type attributes: set
+ :param is_keep: Only keep the document attributes specified in **attributes** when returning the document. Otherwise, all document attributes are included.
+ :type is_keep: bool
+ :param query_options: Keyword arguments to specify AQL query options when fetching documents from the ArangoDB instance.
+ :type query_options: **kwargs
+ :return: Result cursor.
+ :rtype: arango.cursor.Cursor
+ """
aql = f"""
FOR doc IN {col}
RETURN {is_keep} ?
@@ -183,30 +264,68 @@ def __fetch_arangodb_docs(
return self.db.aql.execute(aql, **query_options)
- @final
def __insert_networkx_node(self, adb_id: str, node: dict, col: str):
+ """Insert a NetworkX node into the NetworkX graph.
+
+ :param adb_id: The ArangoDB ID of the node.
+ :type adb_id: str
+ :param node: The node object to insert.
+ :type node: dict
+ :param col: The ArangoDB collection it came from.
+ :type col: str
+ """
nx_id = self.cntrl._prepare_adb_vertex(node, col)
self.cntrl.nx_map[adb_id] = {"_id": nx_id, "collection": col}
self.cntrl.nx_graph.add_node(nx_id, **node)
- @final
def __insert_networkx_edge(self, edge: dict, col: str):
+ """Insert a NetworkX edge into the NetworkX graph.
+
+ :param edge: The edge object to insert.
+ :type edge: dict
+ :param col: The ArangoDB collection it came from.
+ :type col: str
+ """
from_node_id = self.cntrl.nx_map.get(edge["_from"])["_id"]
to_node_id = self.cntrl.nx_map.get(edge["_to"])["_id"]
self.cntrl._prepare_adb_edge(edge, col)
self.cntrl.nx_graph.add_edge(from_node_id, to_node_id, **edge)
- @final
def __insert_arangodb_vertex(self, id, v: dict, col: str, key: str, ow: bool):
+ """Insert an ArangoDB vertex into an ArangoDB collection.
+
+ :param id: The NetworkX ID of the vertex.
+ :type id: Any
+ :param v: The vertex object to insert.
+ :type v: dict
+ :param col: The ArangoDB collection the vertex belongs to.
+ :type col: str
+ :param key: The _key value of the vertex.
+ :type key: str
+ :param ow: If set to True, overwrite existing document with matching _id.
+ :type ow: bool
+ """
self.cntrl.adb_map[id] = {"_id": v["_id"], "collection": col, "key": key}
self.db.collection(col).insert(v, overwrite=ow, silent=True)
- @final
def __insert_arangodb_edge(
self, edge: dict, from_node: dict, to_node: dict, col: str, ow: bool
):
+ """Insert an ArangoDB edge into an ArangoDB collection.
+
+ :param edge: The edge object to insert.
+ :type edge: dict
+ :param from_node: The NetworkX node object representing the edge source.
+ :type from_node: dict
+ :param to_node: The NetworkX node object representing the edge destination.
+ :type to_node: dict
+ :param col: The ArangoDB collection the edge belongs to.
+ :type col: str
+ :param ow: If set to True, overwrite existing document with matching _id.
+ :type ow: bool
+ """
edge["_from"] = self.cntrl.adb_map.get(from_node["id"])["_id"]
edge["_to"] = self.cntrl.adb_map.get(to_node["id"])["_id"]
self.db.collection(col).insert(edge, overwrite=ow, silent=True)
diff --git a/adbnx_adapter/adbnx_adapter/adbnx_controller.py b/adbnx_adapter/adbnx_adapter/adbnx_controller.py
index 88c9f6ab..2f222faf 100644
--- a/adbnx_adapter/adbnx_adapter/adbnx_controller.py
+++ b/adbnx_adapter/adbnx_adapter/adbnx_controller.py
@@ -3,45 +3,67 @@
from arango.graph import Graph as ArangoDBGraph
from networkx.classes.graph import Graph as NetworkXGraph
-try: # Python +3.8
- from typing import final
-except ImportError: # Python 3.6, 3.7
- from overrides import final
-
class Base_ADBNX_Controller(ADBNX_Controller):
+ """ArangoDB-NetworkX controller.
+
+ Responsible for controlling how nodes & edges are handled when
+ transitioning from ArangoDB to NetworkX, and vice-versa.
+ """
+
def __init__(self):
self.nx_graph: NetworkXGraph = None
self.nx_map = dict() # Maps ArangoDB vertex IDs to NetworkX node IDs
self.adb_graph: ArangoDBGraph = None
-
self.adb_map = dict() # Maps NetworkX node IDs to ArangoDB vertex IDs
def _prepare_adb_vertex(self, vertex: dict, collection: str):
- """
+ """Prepare an ArangoDB vertex before it gets inserted into the NetworkX graph.
+
Given an ArangoDB vertex, you can modify it before it gets inserted
into the NetworkX graph, and/or derive a custom node id for networkx to use.
In most cases, it is only required to return the ArangoDB _id of the vertex.
+
+ :param vertex: The ArangoDB vertex object to (optionally) modify.
+ :type vertex: dict
+ :param collection: The ArangoDB collection the vertex belongs to.
+ :type collection: str
+ :return: The ArangoDB _id attribute of the vertex.
+ :rtype: str
"""
return vertex["_id"]
def _prepare_adb_edge(self, edge: dict, collection: str):
- """
+ """Prepare an ArangoDB edge before it gets inserted into the NetworkX graph.
+
Given an ArangoDB edge, you can modify it before it gets inserted
into the NetworkX graph.
In most cases, no action is needed.
+
+ :param edge: The ArangoDB edge object to (optionally) modify.
+ :type edge: dict
+ :param collection: The ArangoDB collection the edge belongs to.
+ :type collection: str
"""
pass
def _identify_nx_node(self, id, node: dict, overwrite: bool) -> str:
- """
- Given a NetworkX node, identify what ArangoDB collection should it belong to.
+ """Given a NetworkX node, identify what ArangoDB collection it should belong to.
NOTE: If your NetworkX graph does not comply to ArangoDB standards
(i.e a node's ID is not "collection/key"), then you must override this function.
+
+ :param id: The NetworkX ID of the node.
+ :type id: Any
+ :param node: The NetworkX node object.
+ :type node: dict
+ :param overwrite: Whether overwrite is enabled or not.
+ :type overwrite: bool
+ :return: The ArangoDB collection name
+ :rtype: str
"""
# In this case, id is already a valid ArangoDB _id
adb_id: str = id
@@ -50,23 +72,40 @@ def _identify_nx_node(self, id, node: dict, overwrite: bool) -> str:
def _identify_nx_edge(
self, edge: dict, from_node: dict, to_node: dict, overwrite: bool
) -> str:
- """
- Given a NetworkX edge, its pair of nodes, and the overwrite boolean,
- identify what ArangoDB collection should it belong to.
+ """Given a NetworkX edge, its pair of nodes, and the overwrite boolean, identify what ArangoDB collection should it belong to.
NOTE: If your NetworkX graph does not comply to ArangoDB standards
(i.e a node's ID is not "collection/key"), then you must override this function.
+
+ :param edge: The NetworkX edge object.
+ :type edge: dict
+ :param from_node: The NetworkX node object representing the edge source.
+ :type from_node: dict
+ :param to_node: The NetworkX node object representing the edge destination.
+ :type to_node: dict
+ :param overwrite: Whether overwrite is enabled or not.
+ :type overwrite: bool
+ :return: The ArangoDB collection name
+ :rtype: str
"""
# In this case, edge["_id"] is already a valid ArangoDB _id
edge_id: str = edge["_id"]
return edge_id.split("/")[0] + ("" if overwrite else "_nx")
def _keyify_nx_node(self, id, node: dict, collection: str, overwrite: bool) -> str:
- """
- Given a NetworkX node, derive its valid ArangoDB key.
+ """Given a NetworkX node, derive its valid ArangoDB key.
NOTE: If your NetworkX graph does not comply to ArangoDB standards
(i.e a node's ID is not "collection/key"), then you must override this function.
+
+ :param node: The NetworkX node object.
+ :type node: dict
+ :param collection: The ArangoDB collection the node belongs to.
+ :type collection: str
+ :param overwrite: Whether overwrite is enabled or not.
+ :type overwrite: bool
+ :return: A valid ArangoDB _key value.
+ :rtype: str
"""
# In this case, id is already a valid ArangoDB _id
adb_id: str = id
@@ -80,21 +119,36 @@ def _keyify_nx_edge(
collection: str,
overwrite: bool,
):
- """
- Given a NetworkX edge, its collection, its pair of nodes, and the overwrite boolean,
- derive its valid ArangoDB key.
+ """Given a NetworkX edge, its collection, its pair of nodes, and the overwrite boolean,
+ derive its valid ArangoDB key.
NOTE: If your NetworkX graph does not comply to ArangoDB standards
(i.e a node's ID is not "collection/key"), then you must override this function.
+
+ :param edge: The NetworkX edge object.
+ :type edge: dict
+ :param from_node: The NetworkX node object representing the edge source.
+ :type from_node: dict
+ :param to_node: The NetworkX node object representing the edge destination.
+ :type to_node: dict
+ :param collection: The ArangoDB collection the node belongs to.
+ :type collection: str
+ :param overwrite: Whether overwrite is enabled or not.
+ :type overwrite: bool
+ :return: The ArangoDB collection name
+ :rtype: str
"""
# In this case, edge["_id"] is already a valid ArangoDB _id
edge_id: str = edge["_id"]
return edge_id.split("/")[1]
- @final
def _string_to_arangodb_key_helper(self, string: str) -> str:
- """
- Given a string, derive a valid ArangoDB _key string.
+ """Given a string, derive a valid ArangoDB _key string.
+
+ :param string: A (possibly) invalid _key string value.
+ :type string: str
+ :return: A valid ArangoDB _key value.
+ :rtype: str
"""
res = ""
for s in string:
@@ -103,10 +157,13 @@ def _string_to_arangodb_key_helper(self, string: str) -> str:
return res
- @final
def _tuple_to_arangodb_key_helper(self, tup: tuple) -> str:
- """
- Given a tuple, derive a valid ArangoDB _key string.
+ """Given a tuple, derive a valid ArangoDB _key string.
+
+ :param tup: A tuple with non-None values.
+ :type tup: tuple
+ :return: A valid ArangoDB _key value.
+ :rtype: str
"""
string = "".join(map(str, tup))
return self._string_to_arangodb_key_helper(string)
diff --git a/adbnx_adapter/requirements.txt b/adbnx_adapter/requirements.txt
deleted file mode 100644
index 157a50ee..00000000
--- a/adbnx_adapter/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-networkx==2.6.3
-python-arango==7.2.0
-pytest==6.2.5
-overrides==6.1.0
\ No newline at end of file
diff --git a/adbnx_adapter/setup.py b/adbnx_adapter/setup.py
index 0186c5f0..6d253f9b 100644
--- a/adbnx_adapter/setup.py
+++ b/adbnx_adapter/setup.py
@@ -1,18 +1,13 @@
-import pathlib
from setuptools import setup
-# The directory containing this file
-HERE = pathlib.Path(__file__).resolve().parents[1]
-
with open("../README.md", "r") as f:
long_description = f.read()
-# This call to setup() does all the work
setup(
name="adbnx_adapter",
author="ArangoDB",
author_email="rajiv@arangodb.com",
- version="1.0.0",
+ version="1.0.1",
description="Convert ArangoDB graphs to NetworkX & vice-versa.",
long_description=long_description,
long_description_content_type="text/markdown",
@@ -21,7 +16,7 @@
include_package_data=True,
python_requires=">=3.6",
license="Apache Software License",
- install_requires=["python-arango", "networkx", "overrides"],
+ install_requires=["python-arango==7.2.0", "networkx>=2.5.1,<=2.6.3"],
tests_require=["pytest", "pytest-cov"],
classifiers=[
"Intended Audience :: Developers",
diff --git a/examples/ArangoDB_NetworkxAdapter.ipynb b/examples/ArangoDB_NetworkxAdapter.ipynb
index 35806984..f2587c51 100644
--- a/examples/ArangoDB_NetworkxAdapter.ipynb
+++ b/examples/ArangoDB_NetworkxAdapter.ipynb
@@ -24,8 +24,8 @@
"id": "Nx9aa3LAeG89"
},
"source": [
- "![networkX](https://github.com/arangoml/networkx-adapter/blob/master/examples/assets/logos/networkx_logo.svg?raw=1) \n",
- "![arangodb](https://github.com/arangoml/networkx-adapter/blob/master/examples/assets/logos/ArangoDB_logo.png?raw=1) "
+ "![arangodb](https://github.com/arangoml/networkx-adapter/blob/master/examples/assets/logos/ArangoDB_logo.png?raw=1) \n",
+ "![networkX](https://github.com/arangoml/networkx-adapter/blob/master/examples/assets/logos/networkx_logo.svg?raw=1) "
]
},
{
@@ -34,9 +34,9 @@
"id": "bpvZS-1aeG89"
},
"source": [
- "Version: 1.0.0\n",
+ "Version: 1.0.1\n",
"\n",
- "In this Notebook we learn how to export Graphs from [ArangoDB](https://www.arangodb.com/), a multi-model Graph Database into [NetworkX](https://networkx.github.io/), the swiss army knife for graph analysis ion python."
+ "In this Notebook we learn how to export Graphs from [ArangoDB](https://www.arangodb.com/), a multi-model Graph Database into [NetworkX](https://networkx.github.io/), the swiss army knife for graph analysis ion python, and vice-versa."
]
},
{
@@ -58,10 +58,10 @@
"source": [
"%%capture\n",
"!git clone -b oasis_connector --single-branch https://github.com/arangodb/interactive_tutorials.git\n",
- "!git clone -b 1.0.0 --single-branch https://github.com/arangoml/networkx-adapter.git\n",
+ "!git clone -b 1.0.1 --single-branch https://github.com/arangoml/networkx-adapter.git\n",
"!rsync -av networkx-adapter/examples/ ./ --exclude=.git\n",
"!rsync -av interactive_tutorials/ ./ --exclude=.git\n",
- "!pip3 install adbnx_adapter==1.0.0\n",
+ "!pip3 install adbnx_adapter==1.0.1\n",
"!pip3 install matplotlib\n",
"!pip3 install pyArango"
]