Skip to content

Commit

Permalink
Release 1.0.1 Preparation (#26)
Browse files Browse the repository at this point in the history
* initial commit

* Update setup.py

* bump

* new: docstrings

* cleanup & update README

* cleanup docstrings

* Update ArangoDB_NetworkxAdapter.ipynb

* cleanup
  • Loading branch information
aMahanna authored Nov 17, 2021
1 parent a93fec4 commit 94ebc75
Show file tree
Hide file tree
Showing 6 changed files with 273 additions and 74 deletions.
46 changes: 39 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,9 @@
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
[![Downloads per month](https://img.shields.io/pypi/dm/adbnx-adapter)](https://pypi.org/project/adbnx-adapter/)

<center>
<span>
<img src="examples/assets/logos/ArangoDB_logo.png" width=45% >
<img src="examples/assets/logos/networkx_logo.svg" width=45% >
</span>
</center>
<br></br>
![](https://raw.githubusercontent.com/arangoml/networkx-adapter/1.0.0/examples/assets/logos/ArangoDB_logo.png)

![](https://raw.githubusercontent.com/arangoml/networkx-adapter/1.0.0/examples/assets/logos/networkx_logo.svg)

The ArangoDB-Networkx Adapter exports Graphs from ArangoDB, a multi-model Graph Database, into NetworkX, the swiss army knife for graph analysis with python, and vice-versa.

Expand All @@ -31,6 +26,43 @@ Networkx is a commonly used tool for analysis of network-data. If your analytics

Get Started on Colab: <a href="https://colab.research.google.com/github/arangoml/networkx-adapter/blob/master/examples/ArangoDB_NetworkxAdapter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


```py
import networkx as nx
from adbnx_adapter.adbnx_adapter import ArangoDB_Networkx_Adapter

con = {
"hostname": "localhost",
"protocol": "http",
"port": 8529,
"username": "root",
"password": "rootpassword",
"dbName": "_system",
}

adbnx_adapter = ArangoDB_Networkx_Adapter(con)

# (Assume ArangoDB fraud-detection data dump is imported)

fraud_nx_g = adbnx_adapter.create_networkx_graph_from_arangodb_graph("fraud-detection")
fraud_nx_g_2 = adbnx_adapter.create_networkx_graph_from_arangodb_collections(
"fraud-detection",
{"account", "bank", "branch", "Class", "customer"},
{"accountHolder", "Relationship", "transaction"}
)


grid_nx_g = nx.grid_2d_graph(5, 5)
grid_edge_definitions = [
{
"edge_collection": "to",
"from_vertex_collections": ["Grid_Node"],
"to_vertex_collections": ["Grid_Node"],
}
]
adb_g = adbnx_adapter.create_arangodb_graph("Grid", grid_nx_g, grid_edge_definitions)
```

## Development & Testing

Prerequisite: `arangorestore` must be installed
Expand Down
171 changes: 145 additions & 26 deletions adbnx_adapter/adbnx_adapter/adbnx_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,17 @@
from arango import ArangoClient
from networkx.classes.graph import Graph as NetworkXGraph

try: # Python +3.8
from typing import final
except ImportError: # Python 3.6, 3.7
from overrides import final


class ArangoDB_Networkx_Adapter(ADBNX_Adapter):
@final
"""ArangoDB-NetworkX adapter.
:param conn: Connection details to an ArangoDB instance.
:type conn: dict
:param controller_class: The ArangoDB-NetworkX controller, used to identify, keyify and prepare nodes & edges before insertion, optionally re-defined by the user if needed (otherwise defaults to Base_ADBNX_Controller).
:type controller_class: Base_ADBNX_Controller
:raise ValueError: If missing required keys in conn
"""

def __init__(
self,
conn: dict,
Expand All @@ -46,10 +49,38 @@ def __init__(

self.cntrl: Base_ADBNX_Controller = controller_class()

@final
def create_networkx_graph(
self, name: str, graph_attributes, is_keep=True, **query_options
self, name: str, graph_attributes: dict, is_keep=True, **query_options
):
"""Create a NetworkX graph from graph attributes.
:param name: The NetworkX graph name.
:type name: str
:param graph_attributes: An object defining vertex & edge collections to import to NetworkX, along with their associated attributes to keep.
:type graph_attributes: dict
:param is_keep: Only keep the document attributes specified in **graph_attributes** when importing to NetworkX (is True by default). Otherwise, all document attributes are included.
:type is_keep: bool
:param query_options: Keyword arguments to specify AQL query options when fetching documents from the ArangoDB instance.
:type query_options: **kwargs
:return: A Multi-Directed NetworkX Graph.
:rtype: networkx.classes.multidigraph.MultiDiGraph
:raise ValueError: If missing required keys in graph_attributes
Here is an example entry for parameter **graph_attributes**:
.. code-block:: python
{
"vertexCollections": {
"account": {"Balance", "account_type", "customer_id", "rank"},
"bank": {"Country", "Id", "bank_id", "bank_name"},
"customer": {"Name", "Sex", "Ssn", "rank"},
},
"edgeCollections": {
"accountHolder": {"_from", "_to"},
"transaction": {"_from", "_to"},
},
}
"""
self.__validate_attributes("graph", set(graph_attributes), self.GRAPH_ATRIBS)

self.cntrl.nx_graph = nx.MultiDiGraph(name=name)
Expand All @@ -65,14 +96,26 @@ def create_networkx_graph(
print(f"NetworkX: {name} created")
return self.cntrl.nx_graph

@final
def create_networkx_graph_from_arangodb_collections(
self,
name: str,
vertex_collections: set,
edge_collections: set,
**query_options,
):
"""Create a NetworkX graph from ArangoDB collections.
:param name: The NetworkX graph name.
:type name: str
:param vertex_collections: A set of ArangoDB vertex collections to import to NetworkX.
:type vertex_collections: set
:param edge_collections: A set of ArangoDB edge collections to import to NetworkX.
:type edge_collections: set
:param query_options: Keyword arguments to specify AQL query options when fetching documents from the ArangoDB instance.
:type query_options: **kwargs
:return: A Multi-Directed NetworkX Graph.
:rtype: networkx.classes.multidigraph.MultiDiGraph
"""
graph_attributes = {
"vertexCollections": {col: {} for col in vertex_collections},
"edgeCollections": {col: {} for col in edge_collections},
Expand All @@ -82,8 +125,17 @@ def create_networkx_graph_from_arangodb_collections(
name, graph_attributes, is_keep=False, **query_options
)

@final
def create_networkx_graph_from_arangodb_graph(self, name: str, **query_options):
"""Create a NetworkX graph from an ArangoDB graph.
:param name: The ArangoDB graph name.
:type name: str
:param vertex_collections: A set of ArangoDB vertex collections to import to NetworkX.
:param query_options: Keyword arguments to specify AQL query options when fetching documents from the ArangoDB instance.
:type query_options: **kwargs
:return: A Multi-Directed NetworkX Graph.
:rtype: networkx.classes.multidigraph.MultiDiGraph
"""
arango_graph = self.db.graph(name)
v_cols = arango_graph.vertex_collections()
e_cols = {col["edge_collection"] for col in arango_graph.edge_definitions()}
Expand All @@ -92,7 +144,6 @@ def create_networkx_graph_from_arangodb_graph(self, name: str, **query_options):
name, v_cols, e_cols, **query_options
)

@final
def create_arangodb_graph(
self,
name: str,
Expand All @@ -101,20 +152,29 @@ def create_arangodb_graph(
overwrite: bool = False,
keyify_edges: bool = False,
):
"""
"""Create an ArangoDB graph from a NetworkX graph, and a set of edge definitions.
:param name: The ArangoDB graph name.
:type name: str
:param original_nx_graph: The existing NetworkX graph.
:type original_nx_graph: networkx.classes.graph.Graph
:param edge_definitions: List of edge definitions, where each edge definition entry is a dictionary with fields "edge_collection", "from_vertex_collections" and "to_vertex_collections" (see below for example).
:type edge_definitions: list[dict]
:param overwrite: If set to True, overwrites existing ArangoDB collections with the NetworkX graph data. Otherwise, will not remove existing data from collections specified in **edge_definitions**.
:type overwrite: bool
:param keyify_edges: If set to True, will create custom edge IDs based on the behavior of the ADBNX_Controller's _keyify_nx_edge() method. Otherwise, edge IDs will be randomly generated.
:type overwrite: bool
:return: The ArangoDB Graph API wrapper.
:rtype: arango.graph.Graph
Here is an example entry for parameter **edge_definitions**:
.. code-block:: python
[
{
'edge_collection': 'teaches',
'from_vertex_collections': ['person'],
'to_vertex_collections': ['lecture']
},
{
'edge_collection': 'attends',
'from_vertex_collections': ['person'],
'to_vertex_collections': ['lecture']
'edge_collection': 'teach',
'from_vertex_collections': ['teachers'],
'to_vertex_collections': ['lectures']
}
]
"""
Expand Down Expand Up @@ -165,16 +225,37 @@ def create_arangodb_graph(
print(f"ArangoDB: {name} created")
return self.cntrl.adb_graph

@final
def __validate_attributes(self, type: str, attributes: set, valid_attributes: set):
"""Validates that a set of attributes includes the required valid attributes.
:param type: The context of the attribute validation (e.g connection attributes, graph attributes, etc).
:type type: str
:param attributes: The provided attributes, possibly invalid.
:type attributes: set
:param valid_attributes: The valid attributes.
:type valid_attributes: set
:raise ValueError: If **valid_attributes** is not a subset of **attributes**
"""
if valid_attributes.issubset(attributes) is False:
missing_attributes = valid_attributes - attributes
raise ValueError(f"Missing {type} attributes: {missing_attributes}")

@final
def __fetch_arangodb_docs(
self, col: str, attributes: set, is_keep: bool, query_options: dict
):
"""Fetches ArangoDB documents within a collection.
:param col: The ArangoDB collection.
:type col: str
:param attributes: The set of document attributes.
:type attributes: set
:param is_keep: Only keep the document attributes specified in **attributes** when returning the document. Otherwise, all document attributes are included.
:type is_keep: bool
:param query_options: Keyword arguments to specify AQL query options when fetching documents from the ArangoDB instance.
:type query_options: **kwargs
:return: Result cursor.
:rtype: arango.cursor.Cursor
"""
aql = f"""
FOR doc IN {col}
RETURN {is_keep} ?
Expand All @@ -183,30 +264,68 @@ def __fetch_arangodb_docs(

return self.db.aql.execute(aql, **query_options)

@final
def __insert_networkx_node(self, adb_id: str, node: dict, col: str):
"""Insert a NetworkX node into the NetworkX graph.
:param adb_id: The ArangoDB ID of the node.
:type adb_id: str
:param node: The node object to insert.
:type node: dict
:param col: The ArangoDB collection it came from.
:type col: str
"""
nx_id = self.cntrl._prepare_adb_vertex(node, col)
self.cntrl.nx_map[adb_id] = {"_id": nx_id, "collection": col}

self.cntrl.nx_graph.add_node(nx_id, **node)

@final
def __insert_networkx_edge(self, edge: dict, col: str):
"""Insert a NetworkX edge into the NetworkX graph.
:param edge: The edge object to insert.
:type edge: dict
:param col: The ArangoDB collection it came from.
:type col: str
"""
from_node_id = self.cntrl.nx_map.get(edge["_from"])["_id"]
to_node_id = self.cntrl.nx_map.get(edge["_to"])["_id"]

self.cntrl._prepare_adb_edge(edge, col)
self.cntrl.nx_graph.add_edge(from_node_id, to_node_id, **edge)

@final
def __insert_arangodb_vertex(self, id, v: dict, col: str, key: str, ow: bool):
"""Insert an ArangoDB vertex into an ArangoDB collection.
:param id: The NetworkX ID of the vertex.
:type id: Any
:param v: The vertex object to insert.
:type v: dict
:param col: The ArangoDB collection the vertex belongs to.
:type col: str
:param key: The _key value of the vertex.
:type key: str
:param ow: If set to True, overwrite existing document with matching _id.
:type ow: bool
"""
self.cntrl.adb_map[id] = {"_id": v["_id"], "collection": col, "key": key}
self.db.collection(col).insert(v, overwrite=ow, silent=True)

@final
def __insert_arangodb_edge(
self, edge: dict, from_node: dict, to_node: dict, col: str, ow: bool
):
"""Insert an ArangoDB edge into an ArangoDB collection.
:param edge: The edge object to insert.
:type edge: dict
:param from_node: The NetworkX node object representing the edge source.
:type from_node: dict
:param to_node: The NetworkX node object representing the edge destination.
:type to_node: dict
:param col: The ArangoDB collection the edge belongs to.
:type col: str
:param ow: If set to True, overwrite existing document with matching _id.
:type ow: bool
"""
edge["_from"] = self.cntrl.adb_map.get(from_node["id"])["_id"]
edge["_to"] = self.cntrl.adb_map.get(to_node["id"])["_id"]
self.db.collection(col).insert(edge, overwrite=ow, silent=True)
Loading

0 comments on commit 94ebc75

Please sign in to comment.