diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000..281bfc09 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,32 @@ +name: build +on: [push] +jobs: + build: + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./adbnx_adapter + strategy: + matrix: + python: ['3.6', '3.7', '3.8', '3.9', '3.10'] + name: Python ${{ matrix.python }} + env: + COVERALLS_REPO_TOKEN: ${{secrets.COVERALLS_REPO_TOKEN}} + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + steps: + - uses: actions/checkout@v2 + - name: Setup python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + - name: Lint with Black + uses: psf/black@stable + with: + options: "--check --verbose --diff --color" + src: "adbnx_adapter" + - name: Install dependencies + run: pip install -e . pytest pytest-cov coveralls + - name: Run pytest + run: | + pytest --cov=adbnx_adapter --cov-report term-missing -v --color=yes --no-cov-on-fail --code-highlight=yes + coveralls \ No newline at end of file diff --git a/.gitignore b/.gitignore index 7d34cde6..5a5d7b9d 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,4 @@ adbnx_adapter/dist/ # Remove the build directory from repo adbnx_adapter/build/ adbnx_adapter/*.egg-info - +.vscode \ No newline at end of file diff --git a/README.md b/README.md index 9bf416cf..538c33cd 100644 --- a/README.md +++ b/README.md @@ -1,61 +1,45 @@ # ArangoDB-Networkx Adapter +[![build](https://github.com/arangoml/networkx-adapter/actions/workflows/build.yml/badge.svg)](https://github.com/arangoml/networkx-adapter/actions/workflows/build.yml) +[![Coverage Status](https://coveralls.io/repos/github/arangoml/networkx-adapter/badge.svg)](https://coveralls.io/github/arangoml/networkx-adapter) -
- -
-
- -
- -The ArangoDB-Networkx Adapter export Graphs from ArangoDB, a multi-model Graph Database into NetworkX, the swiss army knife for graph analysis with python. - - -## Quickstart - -To get started quickly you just use this setup free jupyter notebook: Open In Colab - -To get started in custom code: -```bash -pip install adbnx_adapter networkx matplotlib python-arango -``` +[![PyPI version badge](https://img.shields.io/pypi/v/adbnx-adapter)](https://pypi.org/project/adbnx-adapter/) +[![Python versions badge](https://img.shields.io/pypi/pyversions/adbnx-adapter)](https://github.com/arangoml/networkx-adapter) -``` python -import networkx as nx -from adbnx_adapter.arangoDB_networkx_adapter import ArangoDB_Networkx_Adapter +[![License](https://img.shields.io/github/license/arangoml/networkx-adapter)](https://github.com/arangoml/networkx-adapter/blob/master/LICENSE) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) +[![Downloads per month](https://img.shields.io/pypi/dm/adbnx-adapter)](https://pypi.org/project/adbnx-adapter/) -# Specify the connection to the ArangoDB Database -con = {'dbName': 'YOURDBNAME', - 'username': 'YOURUSERNAME', - 'password': 'YOURPASSOWRD', - 'hostname': 'instance.arangodb.cloud', - 'port': 8529} - -# Create Adapter instance -ma = ArangoDB_Networkx_Adapter(conn = con) - -# Specify attributes to be imported -attributes = { 'vertexCollections': - {'account': {'Balance', 'account_type', 'customer_id', 'rank'}},\ - 'edgeCollections' : - {'accountHolder': {'_from', '_to'},\ - 'transaction': {'_from', '_to'}}} - -# Export networkX graph -g = ma.create_networkx_graph(graph_name = 'FraudDetection', graph_attributes = attributes) +
+ + + + +
+

-# You can also provide valid Python-Arango AQL query options to the command above, like such: -# g = ma.create_networkx_graph(graph_name = 'FraudDetection', graph_attributes = attributes, ttl=1000, stream=True) -# Use networkX -nx.draw(g, with_labels=True) -``` +The ArangoDB-Networkx Adapter exports Graphs from ArangoDB, a multi-model Graph Database, into NetworkX, the swiss army knife for graph analysis with python, and vice-versa. -# Introduction +## About NetworkX Networkx is a commonly used tool for analysis of network-data. If your analytics use cases require the use of all your graph data, for example, to summarize graph structure, or answer global path traversal queries, then using the ArangoDB Pregel API is recommended. If your analysis pertains to a subgraph, then you may be interested in getting the Networkx representation of the subgraph for one of the following reasons: 1. An algorithm for your use case is available in Networkx. 2. A library that you want to use for your use case works with Networkx Graphs as input. +## Quickstart + +Get Started on Colab: Open In Colab + +## Development & Testing + +Prerequisite: `arangorestore` must be installed -Check the DGL folder for an implementation of a Networkx-Adapter for the Deep Graph Library. +1. `git clone https://github.com/arangoml/networkx-adapter.git` +2. `cd networkx-adapter` +3. `python -m venv .venv` +4. `source .venv/bin/activate` (MacOS) or `.venv/scripts/activate` (Windows) +5. `cd adbnx_adapter` +6. `pip install -e . pytest` +7. `pytest` + * If you encounter `ModuleNotFoundError`, try closing & relaunching your virtual environment by running `deactivate` in your terminal & restarting from Step 4. diff --git a/adbnx_adapter/.gitignore b/adbnx_adapter/.gitignore index 09622282..f89f5e06 100644 --- a/adbnx_adapter/.gitignore +++ b/adbnx_adapter/.gitignore @@ -6,3 +6,4 @@ */.ipynb_checkpoints/* #OSX DS_Store **/*.DS_Store +.venv \ No newline at end of file diff --git a/adbnx_adapter/MANIFEST.in b/adbnx_adapter/MANIFEST.in index 8d1c8b69..03d0f335 100644 --- a/adbnx_adapter/MANIFEST.in +++ b/adbnx_adapter/MANIFEST.in @@ -1 +1 @@ - +graft adbnx_adapter \ No newline at end of file diff --git a/adbnx_adapter/__init__.py b/adbnx_adapter/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/adbnx_adapter/adbnx_adapter/abc.py b/adbnx_adapter/adbnx_adapter/abc.py new file mode 100644 index 00000000..ad57cd64 --- /dev/null +++ b/adbnx_adapter/adbnx_adapter/abc.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Mar 26 11:38:31 2020 + +@author: Rajiv Sambasivan +@author: Joerg Schad +@author: Anthony Mahanna +""" + +from abc import ABC + + +class ADBNX_Adapter(ABC): + def __init__(self): + raise NotImplementedError() # pragma: no cover + + def create_networkx_graph(self): + raise NotImplementedError() # pragma: no cover + + def create_networkx_graph_from_arangodb_collections(self): + raise NotImplementedError() # pragma: no cover + + def create_networkx_graph_from_arangodb_graph(self): + raise NotImplementedError() # pragma: no cover + + def create_arangodb_graph(self): + raise NotImplementedError() # pragma: no cover + + def __validate_attributes(self): + raise NotImplementedError() # pragma: no cover + + def __fetch_arangodb_docs(self): + raise NotImplementedError() # pragma: no cover + + def __insert_arangodb_vertex(self): + raise NotImplementedError() # pragma: no cover + + def __insert_arangodb_edge(self): + raise NotImplementedError() # pragma: no cover + + def __insert_networkx_node(self): + raise NotImplementedError() # pragma: no cover + + def __insert_networkx_edge(self): + raise NotImplementedError() # pragma: no cover + + @property + def CONNECTION_ATRIBS(self): + return {"hostname", "username", "password", "dbName"} + + @property + def GRAPH_ATRIBS(self): + return {"vertexCollections", "edgeCollections"} + + +class ADBNX_Controller(ABC): + def __init__(self): + raise NotImplementedError() # pragma: no cover + + def _prepare_adb_vertex(self, vertex: dict, collection: str): + raise NotImplementedError() # pragma: no cover + + def _prepare_adb_edge(self, edge: dict, collection: str): + raise NotImplementedError() # pragma: no cover + + def _identify_nx_node(self, id, node: dict, overwrite: bool) -> str: + raise NotImplementedError() # pragma: no cover + + def _identify_nx_edge( + self, edge: dict, from_node: dict, to_node: dict, overwrite: bool + ) -> str: + raise NotImplementedError() # pragma: no cover + + def _keyify_nx_node(self, id, node: dict, collection: str, overwrite: bool) -> str: + raise NotImplementedError() # pragma: no cover + + def _keyify_nx_edge( + self, + edge: dict, + from_node: dict, + to_node: dict, + collection: str, + overwrite: bool, + ): + raise NotImplementedError() # pragma: no cover + + @property + def VALID_KEY_CHARS(self): + return { + "_", + "-", + ":", + ".", + "@", + "(", + ")", + "+", + ",", + "=", + ";", + "$", + "!", + "*", + "'", + "%", + } diff --git a/adbnx_adapter/adbnx_adapter/adbnx_adapter.py b/adbnx_adapter/adbnx_adapter/adbnx_adapter.py new file mode 100644 index 00000000..163a4c27 --- /dev/null +++ b/adbnx_adapter/adbnx_adapter/adbnx_adapter.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Mar 26 09:51:47 2020 + +@author: Rajiv Sambasivan +@author: Joerg Schad +@author: Anthony Mahanna +""" + +from .abc import ADBNX_Adapter +from .adbnx_controller import Base_ADBNX_Controller + +import networkx as nx +from arango import ArangoClient +from networkx.classes.graph import Graph as NetworkXGraph + +try: # Python +3.8 + from typing import final +except ImportError: # Python 3.6, 3.7 + from overrides import final + + +class ArangoDB_Networkx_Adapter(ADBNX_Adapter): + @final + def __init__( + self, + conn: dict, + controller_class: Base_ADBNX_Controller = Base_ADBNX_Controller, + ): + self.__validate_attributes("connection", set(conn), self.CONNECTION_ATRIBS) + + username = conn["username"] + password = conn["password"] + db_name = conn["dbName"] + port = str(conn.get("port", 8529)) + protocol = conn.get("protocol", "https") + + url = protocol + "://" + conn["hostname"] + ":" + port + print(f"Connecting to {url}") + self.db = ArangoClient(hosts=url).db(db_name, username, password, verify=True) + + if issubclass(controller_class, Base_ADBNX_Controller) is False: + msg = "controller_class must inherit from Base_ADBNX_Controller" # pragma: no cover + raise TypeError(msg) # pragma: no cover + + self.cntrl: Base_ADBNX_Controller = controller_class() + + @final + def create_networkx_graph( + self, name: str, graph_attributes, is_keep=True, **query_options + ): + self.__validate_attributes("graph", set(graph_attributes), self.GRAPH_ATRIBS) + + self.cntrl.nx_graph = nx.MultiDiGraph(name=name) + + for col, atribs in graph_attributes["vertexCollections"].items(): + for v in self.__fetch_arangodb_docs(col, atribs, is_keep, query_options): + self.__insert_networkx_node(v["_id"], v, col) + + for col, atribs in graph_attributes["edgeCollections"].items(): + for e in self.__fetch_arangodb_docs(col, atribs, is_keep, query_options): + self.__insert_networkx_edge(e, col) + + print(f"NetworkX: {name} created") + return self.cntrl.nx_graph + + @final + def create_networkx_graph_from_arangodb_collections( + self, + name: str, + vertex_collections: set, + edge_collections: set, + **query_options, + ): + graph_attributes = { + "vertexCollections": {col: {} for col in vertex_collections}, + "edgeCollections": {col: {} for col in edge_collections}, + } + + return self.create_networkx_graph( + name, graph_attributes, is_keep=False, **query_options + ) + + @final + def create_networkx_graph_from_arangodb_graph(self, name: str, **query_options): + arango_graph = self.db.graph(name) + v_cols = arango_graph.vertex_collections() + e_cols = {col["edge_collection"] for col in arango_graph.edge_definitions()} + + return self.create_networkx_graph_from_arangodb_collections( + name, v_cols, e_cols, **query_options + ) + + @final + def create_arangodb_graph( + self, + name: str, + original_nx_graph: NetworkXGraph, + edge_definitions: list, + overwrite: bool = False, + keyify_edges: bool = False, + ): + """ + Here is an example entry for parameter **edge_definitions**: + + .. code-block:: python + [ + { + 'edge_collection': 'teaches', + 'from_vertex_collections': ['person'], + 'to_vertex_collections': ['lecture'] + }, + { + 'edge_collection': 'attends', + 'from_vertex_collections': ['person'], + 'to_vertex_collections': ['lecture'] + } + ] + """ + nx_graph: NetworkXGraph = original_nx_graph.copy() + + for definition in edge_definitions: + e_col = definition["edge_collection"] + if self.db.has_collection(e_col): + self.db.collection(e_col).truncate() if overwrite else None + else: + self.db.create_collection(e_col, edge=True) + + for v_col in ( + definition["from_vertex_collections"] + + definition["to_vertex_collections"] + ): + if self.db.has_collection(v_col): + self.db.collection(v_col).truncate() if overwrite else None + else: + self.db.create_collection(v_col) + + if overwrite: + self.db.delete_graph(name, ignore_missing=True) + + self.cntrl.adb_graph = self.db.create_graph( + name, edge_definitions=edge_definitions + ) + + for node_id, node in nx_graph.nodes(data=True): + col = self.cntrl._identify_nx_node(node_id, node, overwrite) + key = self.cntrl._keyify_nx_node(node_id, node, col, overwrite) + node["_id"] = col + "/" + key + self.__insert_arangodb_vertex(node_id, node, col, key, overwrite) + + for from_node_id, to_node_id, edge in nx_graph.edges(data=True): + from_node = {"id": from_node_id, **nx_graph.nodes[from_node_id]} + to_node = {"id": to_node_id, **nx_graph.nodes[to_node_id]} + + col = self.cntrl._identify_nx_edge(edge, from_node, to_node, overwrite) + if keyify_edges: + key = self.cntrl._keyify_nx_edge( + edge, from_node, to_node, col, overwrite + ) + edge["_id"] = col + "/" + key + + self.__insert_arangodb_edge(edge, from_node, to_node, col, overwrite) + + print(f"ArangoDB: {name} created") + return self.cntrl.adb_graph + + @final + def __validate_attributes(self, type: str, attributes: set, valid_attributes: set): + if valid_attributes.issubset(attributes) is False: + missing_attributes = valid_attributes - attributes + raise ValueError(f"Missing {type} attributes: {missing_attributes}") + + @final + def __fetch_arangodb_docs( + self, col: str, attributes: set, is_keep: bool, query_options: dict + ): + aql = f""" + FOR doc IN {col} + RETURN {is_keep} ? + MERGE(KEEP(doc, {list(attributes)}), {{"_id": doc._id}}) : doc + """ + + return self.db.aql.execute(aql, **query_options) + + @final + def __insert_networkx_node(self, adb_id: str, node: dict, col: str): + nx_id = self.cntrl._prepare_adb_vertex(node, col) + self.cntrl.nx_map[adb_id] = {"_id": nx_id, "collection": col} + + self.cntrl.nx_graph.add_node(nx_id, **node) + + @final + def __insert_networkx_edge(self, edge: dict, col: str): + from_node_id = self.cntrl.nx_map.get(edge["_from"])["_id"] + to_node_id = self.cntrl.nx_map.get(edge["_to"])["_id"] + + self.cntrl._prepare_adb_edge(edge, col) + self.cntrl.nx_graph.add_edge(from_node_id, to_node_id, **edge) + + @final + def __insert_arangodb_vertex(self, id, v: dict, col: str, key: str, ow: bool): + self.cntrl.adb_map[id] = {"_id": v["_id"], "collection": col, "key": key} + self.db.collection(col).insert(v, overwrite=ow, silent=True) + + @final + def __insert_arangodb_edge( + self, edge: dict, from_node: dict, to_node: dict, col: str, ow: bool + ): + edge["_from"] = self.cntrl.adb_map.get(from_node["id"])["_id"] + edge["_to"] = self.cntrl.adb_map.get(to_node["id"])["_id"] + self.db.collection(col).insert(edge, overwrite=ow, silent=True) diff --git a/adbnx_adapter/adbnx_adapter/adbnx_controller.py b/adbnx_adapter/adbnx_adapter/adbnx_controller.py new file mode 100644 index 00000000..88c9f6ab --- /dev/null +++ b/adbnx_adapter/adbnx_adapter/adbnx_controller.py @@ -0,0 +1,112 @@ +from .abc import ADBNX_Controller + +from arango.graph import Graph as ArangoDBGraph +from networkx.classes.graph import Graph as NetworkXGraph + +try: # Python +3.8 + from typing import final +except ImportError: # Python 3.6, 3.7 + from overrides import final + + +class Base_ADBNX_Controller(ADBNX_Controller): + def __init__(self): + self.nx_graph: NetworkXGraph = None + self.nx_map = dict() # Maps ArangoDB vertex IDs to NetworkX node IDs + + self.adb_graph: ArangoDBGraph = None + + self.adb_map = dict() # Maps NetworkX node IDs to ArangoDB vertex IDs + + def _prepare_adb_vertex(self, vertex: dict, collection: str): + """ + Given an ArangoDB vertex, you can modify it before it gets inserted + into the NetworkX graph, and/or derive a custom node id for networkx to use. + + In most cases, it is only required to return the ArangoDB _id of the vertex. + """ + return vertex["_id"] + + def _prepare_adb_edge(self, edge: dict, collection: str): + """ + Given an ArangoDB edge, you can modify it before it gets inserted + into the NetworkX graph. + + In most cases, no action is needed. + """ + pass + + def _identify_nx_node(self, id, node: dict, overwrite: bool) -> str: + """ + Given a NetworkX node, identify what ArangoDB collection should it belong to. + + NOTE: If your NetworkX graph does not comply to ArangoDB standards + (i.e a node's ID is not "collection/key"), then you must override this function. + """ + # In this case, id is already a valid ArangoDB _id + adb_id: str = id + return adb_id.split("/")[0] + ("" if overwrite else "_nx") + + def _identify_nx_edge( + self, edge: dict, from_node: dict, to_node: dict, overwrite: bool + ) -> str: + """ + Given a NetworkX edge, its pair of nodes, and the overwrite boolean, + identify what ArangoDB collection should it belong to. + + NOTE: If your NetworkX graph does not comply to ArangoDB standards + (i.e a node's ID is not "collection/key"), then you must override this function. + """ + # In this case, edge["_id"] is already a valid ArangoDB _id + edge_id: str = edge["_id"] + return edge_id.split("/")[0] + ("" if overwrite else "_nx") + + def _keyify_nx_node(self, id, node: dict, collection: str, overwrite: bool) -> str: + """ + Given a NetworkX node, derive its valid ArangoDB key. + + NOTE: If your NetworkX graph does not comply to ArangoDB standards + (i.e a node's ID is not "collection/key"), then you must override this function. + """ + # In this case, id is already a valid ArangoDB _id + adb_id: str = id + return adb_id.split("/")[1] + + def _keyify_nx_edge( + self, + edge: dict, + from_node: dict, + to_node: dict, + collection: str, + overwrite: bool, + ): + """ + Given a NetworkX edge, its collection, its pair of nodes, and the overwrite boolean, + derive its valid ArangoDB key. + + NOTE: If your NetworkX graph does not comply to ArangoDB standards + (i.e a node's ID is not "collection/key"), then you must override this function. + """ + # In this case, edge["_id"] is already a valid ArangoDB _id + edge_id: str = edge["_id"] + return edge_id.split("/")[1] + + @final + def _string_to_arangodb_key_helper(self, string: str) -> str: + """ + Given a string, derive a valid ArangoDB _key string. + """ + res = "" + for s in string: + if s.isalnum() or s in self.VALID_KEY_CHARS: + res += s + + return res + + @final + def _tuple_to_arangodb_key_helper(self, tup: tuple) -> str: + """ + Given a tuple, derive a valid ArangoDB _key string. + """ + string = "".join(map(str, tup)) + return self._string_to_arangodb_key_helper(string) diff --git a/adbnx_adapter/adbnx_adapter/arangoDB_networkx_adapter.py b/adbnx_adapter/adbnx_adapter/arangoDB_networkx_adapter.py deleted file mode 100644 index 07c9fc37..00000000 --- a/adbnx_adapter/adbnx_adapter/arangoDB_networkx_adapter.py +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Thu Mar 26 09:51:47 2020 - -@author: Rajiv Sambasivan -""" - - -from adbnx_adapter.arangodb_networkx_adapter_base import Networkx_Adapter_Base -import networkx as nx -from arango import ArangoClient - - -class ArangoDB_Networkx_Adapter(Networkx_Adapter_Base): - - def __init__(self, conn): - if self.is_valid_conn(conn): - url = conn["hostname"] - user_name = conn["username"] - password = conn["password"] - dbName = conn["dbName"] - if 'port' in conn: - port = str(conn['port']) - else: - port = '8529' - if 'protocol' in conn: - protocol = conn['protocol'] - else: - protocol = "https" - con_str = protocol + "://" + url + ":" + port - client = ArangoClient(hosts=con_str) - self.db = client.db(dbName, user_name, password) - else: - print( - "The connection information you supplied is invalid, please check and try again!") - - return - - def is_valid_conn(self, conn): - valid_con_info = True - - if not "hostname" in conn: - print("hostname is missing in connection") - if not "username" in conn: - print("Username is missing in connection") - valid_con_info = False - if not "password" in conn: - print("Password is missing in connection") - valid_con_info = False - if not "dbName" in conn: - print("Database is missing in connection") - valid_con_info = False - - return valid_con_info - - def is_valid_graph_attributes(self, graph_config): - valid_config = True - - if not 'vertexCollections' in graph_config: - print("Graph attributes do not contain vertex collections") - valid_config = False - if not 'edgeCollections' in graph_config: - print("Graph attributes do not contain edge collections") - valid_config = False - - return valid_config - - def create_networkx_graph(self, graph_name, graph_attributes, **query_options): - - if self.is_valid_graph_attributes(graph_attributes): - g = nx.DiGraph() - for k, v in graph_attributes['vertexCollections'].items(): - query = "FOR doc in %s " % (k) - cspl = [s + ':' + 'doc.' + s for s in v] - cspl.append('_id: doc._id') - csps = ','.join(cspl) - query = query + "RETURN { " + csps + "}" - - cursor = self.db.aql.execute(query, **query_options) - for doc in cursor: - g.add_node(doc['_id'], attr_dict=doc) - - for k, v in graph_attributes['edgeCollections'].items(): - query = "FOR doc in %s " % (k) - cspl = [s + ':' + 'doc.' + s for s in v] - cspl.append('_id: doc._id') - csps = ','.join(cspl) - query = query + "RETURN { " + csps + "}" - - cursor = self.db.aql.execute(query, **query_options) - # breakpoint() - for doc in cursor: - g.add_edge(doc['_from'], doc['_to']) - - return g diff --git a/adbnx_adapter/adbnx_adapter/arangodb_networkx_adapter_base.py b/adbnx_adapter/adbnx_adapter/arangodb_networkx_adapter_base.py deleted file mode 100644 index be49fd14..00000000 --- a/adbnx_adapter/adbnx_adapter/arangodb_networkx_adapter_base.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Thu Mar 26 11:38:31 2020 - -@author: Rajiv Sambasivan -""" - -from abc import ABC - - -class Networkx_Adapter_Base(ABC): - - def create_networkx_graph(): - pass diff --git a/adbnx_adapter/adbnx_adapter/dgl_arangoDB_networkx_adapter.py b/adbnx_adapter/adbnx_adapter/dgl_arangoDB_networkx_adapter.py deleted file mode 100644 index a4e83f37..00000000 --- a/adbnx_adapter/adbnx_adapter/dgl_arangoDB_networkx_adapter.py +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Thu Mar 26 09:51:47 2020 - -@author: Rajiv Sambasivan -""" - - -from adbnx_adapter.arangoDB_networkx_adapter import ArangoDB_Networkx_Adapter -import networkx as nx -import torch as th -import numpy as np -import dgl - - -class DGLArangoDB_Networkx_Adapter(ArangoDB_Networkx_Adapter): - - def create_networkx_graph(self, graph_name, graph_attributes, **query_options): - - if self.is_valid_graph_attributes(graph_attributes): - edge_names = [] - redge_names = [] - for k, v in graph_attributes['edgeCollections'].items(): - edge_names.append(k) - ens = k.split('_', 1) - redge = ens[1] + '_' + ens[0] - redge_names.append(redge) - - sgdata = {ename: nx.DiGraph() for ename in edge_names} - rsgdata = {ename: nx.DiGraph() for ename in redge_names} - nxg = nx.DiGraph() - labels = [] - node_data = {} - - print("Loading edge data...") - - for k, v in graph_attributes['edgeCollections'].items(): - query = "FOR doc in %s " % (k) - cspl = [s + ':' + 'doc.' + s for s in v] - cspl.append('_id: doc._id') - csps = ','.join(cspl) - query = query + "RETURN { " + csps + "}" - sgraph = sgdata[k] - ens = k.split('_', 1) - redge = ens[1] + '_' + ens[0] - rsgraph = rsgdata[redge] - cursor = self.db.aql.execute(query, **query_options) - for doc in cursor: - nfrom = doc['_from'] - nto = doc['_to'] - sgraph.add_edge(nfrom, nto) - sgraph.nodes[nfrom]['bipartite'] = 0 - sgraph.nodes[nto]['bipartite'] = 1 - rsgraph.add_edge(nto, nfrom) - rsgraph.nodes[nfrom]['bipartite'] = 1 - rsgraph.nodes[nto]['bipartite'] = 0 - - print("Loading vertex data...") - vnames = [] - for k, v in graph_attributes['vertexCollections'].items(): - vnames.append(k) - node_data[k] = list() - query = "FOR doc in %s " % (k) - cspl = [s + ':' + 'doc.' + s for s in v] - cspl.append('_id: doc._id') - csps = ','.join(cspl) - query = query + "RETURN { " + csps + "}" - - cursor = self.db.aql.execute(query, **query_options) - for doc in cursor: - exclude_attr = ['_id', '_key', 'node_id'] - if k == 'incident': - exclude_attr.append('reassigned') - labels.append(doc['reassigned']) - sdata = {k: v for k, v in doc.items() - if k not in exclude_attr} - ndvalues = np.fromiter(sdata.values(), dtype=int) - #rndata = np.asarray(ndvalues, dtype = int) - #v_data = th.from_numpy(rndata) - node_data[k].append(ndvalues) - - print("Creating DGL Heterograph...") - dict_desc = dict() - for ename in edge_names: - ens = ename.split('_', 1) - redge = ens[1] + '_' + ens[0] - fgk = (ens[0], ename, ens[1]) - dict_desc[fgk] = nxg - rgk = (ens[1], redge, ens[0]) - dict_desc[fgk] = sgdata[ename] - dict_desc[rgk] = rsgdata[redge] - - g = dgl.heterograph(dict_desc) - - for v in vnames: - rndata = np.asarray(node_data[v], dtype=int) - v_data = th.from_numpy(rndata) - g.nodes[v].data['f'] = v_data - - return g, labels - - def create_dgl_graph(self, graph_name, graph_attributes): - print("Creating DGL graph...") - g, labels = self.create_networkx_graph(graph_name, graph_attributes) - print("done!") - - return g, labels diff --git a/adbnx_adapter/adbnx_adapter/imdb_arangoDB_networkx_adapter.py b/adbnx_adapter/adbnx_adapter/imdb_arangoDB_networkx_adapter.py deleted file mode 100644 index d5aee911..00000000 --- a/adbnx_adapter/adbnx_adapter/imdb_arangoDB_networkx_adapter.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Thu Mar 26 09:51:47 2020 - -@author: Rajiv Sambasivan -""" - - -from adbnx_adapter.arangoDB_networkx_adapter import ArangoDB_Networkx_Adapter -import networkx as nx - - -class IMDBArangoDB_Networkx_Adapter(ArangoDB_Networkx_Adapter): - - def create_networkx_graph(self, graph_name, graph_attributes, **query_options): - - if self.is_valid_graph_attributes(graph_attributes): - g = nx.DiGraph() - for k, v in graph_attributes['vertexCollections'].items(): - query = "FOR doc in %s " % (k) - cspl = [s + ':' + 'doc.' + s for s in v] - cspl.append('_id: doc._id') - csps = ','.join(cspl) - query = query + "RETURN { " + csps + "}" - - cursor = self.db.aql.execute(query, **query_options) - for doc in cursor: - if k == "Users": - bip_key = 0 - else: - bip_key = 1 - g.add_node(doc['_id'], attr_dict=doc, bipartite=bip_key) - - for k, v in graph_attributes['edgeCollections'].items(): - query = "FOR doc in %s " % (k) - cspl = [s + ':' + 'doc.' + s for s in v] - cspl.append('_id: doc._id') - csps = ','.join(cspl) - query = query + "RETURN { " + csps + "}" - - cursor = self.db.aql.execute(query, **query_options) - - for doc in cursor: - g.add_edge(doc['_from'], doc['_to']) - - return g diff --git a/adbnx_adapter/requirements.txt b/adbnx_adapter/requirements.txt index 91e7dd14..157a50ee 100644 --- a/adbnx_adapter/requirements.txt +++ b/adbnx_adapter/requirements.txt @@ -1,2 +1,4 @@ -python-arango -PyYAML==5.1.1 +networkx==2.6.3 +python-arango==7.2.0 +pytest==6.2.5 +overrides==6.1.0 \ No newline at end of file diff --git a/adbnx_adapter/setup.cfg b/adbnx_adapter/setup.cfg index b88034e4..656ba988 100644 --- a/adbnx_adapter/setup.cfg +++ b/adbnx_adapter/setup.cfg @@ -1,2 +1,7 @@ [metadata] description-file = README.md + +[tool:pytest] +markers = + unit: Marks a unit test +testpaths = tests \ No newline at end of file diff --git a/adbnx_adapter/setup.py b/adbnx_adapter/setup.py index 9ad355d7..0186c5f0 100644 --- a/adbnx_adapter/setup.py +++ b/adbnx_adapter/setup.py @@ -1,25 +1,39 @@ -from setuptools import setup import pathlib - +from setuptools import setup # The directory containing this file HERE = pathlib.Path(__file__).resolve().parents[1] +with open("../README.md", "r") as f: + long_description = f.read() # This call to setup() does all the work setup( name="adbnx_adapter", - version="0.0.0.2.5.3-1", - description="package for creating networkx adapters for arangodb", - long_description="package for creating networkx adapters for arangodb", - long_description_content_type="text/markdown", - url="https://github.com/arangoml/networkx-adapter", author="ArangoDB", author_email="rajiv@arangodb.com", - license="Apache", + version="1.0.0", + description="Convert ArangoDB graphs to NetworkX & vice-versa.", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/arangoml/networkx-adapter", + packages=["adbnx_adapter"], + include_package_data=True, + python_requires=">=3.6", + license="Apache Software License", + install_requires=["python-arango", "networkx", "overrides"], + tests_require=["pytest", "pytest-cov"], classifiers=[ + "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7"], - packages=["adbnx_adapter"], - include_package_data=True) + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Topic :: Utilities", + "Typing :: Typed", + ], +) diff --git a/examples/tools/arangorestore b/adbnx_adapter/tests/arangorestore similarity index 58% rename from examples/tools/arangorestore rename to adbnx_adapter/tests/arangorestore index 8b0b39f4..a7c18c09 100755 Binary files a/examples/tools/arangorestore and b/adbnx_adapter/tests/arangorestore differ diff --git a/adbnx_adapter/tests/conftest.py b/adbnx_adapter/tests/conftest.py new file mode 100755 index 00000000..5c9c992c --- /dev/null +++ b/adbnx_adapter/tests/conftest.py @@ -0,0 +1,171 @@ +import io +import os +import time +import json +import zipfile +import requests +import subprocess +from pathlib import Path +import urllib.request as urllib + +import networkx as nx +from adbnx_adapter.adbnx_controller import Base_ADBNX_Controller +from adbnx_adapter.adbnx_adapter import ArangoDB_Networkx_Adapter + +PROJECT_DIR = Path(__file__).parent.parent.parent + + +def pytest_sessionstart(): + global con + con = get_oasis_crendetials() + print_connection_details(con) + time.sleep(5) # Enough for the oasis instance to be ready. + + global adbnx_adapter, imdb_adbnx_adapter, grid_adbnx_adapter, football_adbnx_adapter, karate_adbnx_adapter + adbnx_adapter = ArangoDB_Networkx_Adapter(con) + imdb_adbnx_adapter = ArangoDB_Networkx_Adapter(con, IMDB_ADBNX_Controller) + grid_adbnx_adapter = ArangoDB_Networkx_Adapter(con, Grid_ADBNX_Controller) + football_adbnx_adapter = ArangoDB_Networkx_Adapter(con, Football_ADBNX_Controller) + karate_adbnx_adapter = ArangoDB_Networkx_Adapter(con, Karate_ADBNX_Controller) + + arango_restore("examples/data/fraud_dump") + arango_restore("examples/data/imdb_dump") + + edge_definitions = [ + { + "edge_collection": "accountHolder", + "from_vertex_collections": ["customer"], + "to_vertex_collections": ["account"], + }, + { + "edge_collection": "transaction", + "from_vertex_collections": ["account"], + "to_vertex_collections": ["account"], + }, + ] + adbnx_adapter.db.create_graph("fraud-detection", edge_definitions=edge_definitions) + + +def get_oasis_crendetials() -> dict: + url = "https://tutorials.arangodb.cloud:8529/_db/_system/tutorialDB/tutorialDB" + request = requests.post(url, data=json.dumps("{}")) + if request.status_code != 200: + raise Exception("Error retrieving login data.") + + return json.loads(request.text) + + +def arango_restore(path_to_data): + restore_prefix = "./" if os.getenv("GITHUB_ACTIONS") else "" # temporary hack + + subprocess.check_call( + f'chmod -R 755 ./arangorestore && {restore_prefix}arangorestore -c none --server.endpoint http+ssl://{con["hostname"]}:{con["port"]} --server.username {con["username"]} --server.database {con["dbName"]} --server.password {con["password"]} --default-replication-factor 3 --input-directory "{PROJECT_DIR}/{path_to_data}"', + cwd=f"{PROJECT_DIR}/adbnx_adapter/tests", + shell=True, + ) + + +def print_connection_details(con): + print("----------------------------------------") + print("https://{}:{}".format(con["hostname"], con["port"])) + print("Username: " + con["username"]) + print("Password: " + con["password"]) + print("Database: " + con["dbName"]) + print("----------------------------------------") + + +class IMDB_ADBNX_Controller(Base_ADBNX_Controller): + def _prepare_adb_vertex(self, vertex: dict, collection: str): + vertex["bipartite"] = 0 if collection == "Users" else 1 + return vertex["_id"] + + +class Grid_ADBNX_Controller(Base_ADBNX_Controller): + def _prepare_adb_vertex(self, vertex: dict, collection: str): + nx_id = tuple( + int(n) + for n in tuple( + vertex["_key"], + ) + ) + return nx_id + + def _identify_nx_node(self, id: tuple, node: dict, overwrite: bool) -> str: + return "Grid_Node" # Only one node collection in this dataset + + def _keyify_nx_node( + self, id: tuple, node: dict, collection: str, overwrite: bool + ) -> str: + return self._tuple_to_arangodb_key_helper(id) + + def _identify_nx_edge( + self, edge: dict, from_node: dict, to_node: dict, overwrite: bool + ) -> str: + from_collection = self.adb_map.get(from_node["id"])["collection"] + to_collection = self.adb_map.get(to_node["id"])["collection"] + + if from_collection == to_collection == "Grid_Node": + return "to" + + return "Unknown_Edge" + + +def get_grid_graph(): + return nx.grid_2d_graph(5, 5) + + +class Football_ADBNX_Controller(Base_ADBNX_Controller): + def _identify_nx_node(self, id, node: dict, overwrite: bool) -> str: + return "Football_Team" # Only one node collection in this dataset= + + def _keyify_nx_node(self, id, node: dict, collection: str, overwrite: bool) -> str: + return self._string_to_arangodb_key_helper(id) + + def _identify_nx_edge( + self, edge: dict, from_node: dict, to_node: dict, overwrite: bool + ) -> str: + from_collection = self.adb_map.get(from_node["id"])["collection"] + to_collection = self.adb_map.get(to_node["id"])["collection"] + + if from_collection == to_collection == "Football_Team": + return "played" + + return "Unknown_Edge" + + +def get_football_graph(): + url = "http://www-personal.umich.edu/~mejn/netdata/football.zip" + sock = urllib.urlopen(url) + s = io.BytesIO(sock.read()) + sock.close() + zf = zipfile.ZipFile(s) + gml = zf.read("football.gml").decode() + gml = gml.split("\n")[1:] + return nx.parse_gml(gml) + + +class Karate_ADBNX_Controller(Base_ADBNX_Controller): + def _identify_nx_node(self, id, node: dict, overwrite: bool) -> str: + return "Karate_Student" + + def _identify_nx_edge( + self, edge: dict, from_node: dict, to_node: dict, overwrite: bool + ) -> str: + from_collection = self.adb_map.get(from_node["id"])["collection"] + to_collection = self.adb_map.get(to_node["id"])["collection"] + + if from_collection == to_collection == "Karate_Student": + return "knows" + + return "Unknown_Edge" + + def _keyify_nx_node(self, id, node: dict, collection: str, overwrite: bool) -> str: + return str(id) # In this case the id is an integer + + +def get_karate_graph(): + karate_nx_g = nx.karate_club_graph() + for id, node in karate_nx_g.nodes(data=True): + node["degree"] = karate_nx_g.degree(id) + + return karate_nx_g diff --git a/adbnx_adapter/tests/test_adbnx_adapter.py b/adbnx_adapter/tests/test_adbnx_adapter.py new file mode 100644 index 00000000..9a87d6f4 --- /dev/null +++ b/adbnx_adapter/tests/test_adbnx_adapter.py @@ -0,0 +1,337 @@ +import pytest +from conftest import ( + nx, + ArangoDB_Networkx_Adapter, + Base_ADBNX_Controller, + get_grid_graph, + get_football_graph, + get_karate_graph, + adbnx_adapter, + imdb_adbnx_adapter, + grid_adbnx_adapter, + football_adbnx_adapter, + karate_adbnx_adapter, +) + +from arango.graph import Graph as ArangoGraph +from networkx.classes.graph import Graph as NxGraph + + +@pytest.mark.unit +@pytest.mark.parametrize( + "bad_connection", + [ + { + "dbName": "_system", + "hostname": "localhost", + "protocol": "http", + "port": 8529, + # "username": "root", + # "password": "password", + } + ], +) +def test_validate_attributes(bad_connection): + with pytest.raises(ValueError): + ArangoDB_Networkx_Adapter(bad_connection) + + +@pytest.mark.unit +@pytest.mark.parametrize( + "adapter, name, attributes", + [ + ( + adbnx_adapter, + "fraud-detection", + { + "vertexCollections": { + "account": {"Balance", "account_type", "customer_id", "rank"}, + "bank": {"Country", "Id", "bank_id", "bank_name"}, + "branch": { + "City", + "Country", + "Id", + "bank_id", + "branch_id", + "branch_name", + }, + "Class": {"concrete", "label", "name"}, + "customer": {"Name", "Sex", "Ssn", "rank"}, + }, + "edgeCollections": { + "accountHolder": {"_from", "_to"}, + "Relationship": { + "_from", + "_to", + "label", + "name", + "relationshipType", + }, + "transaction": {"_from", "_to"}, + }, + }, + ), + ( + imdb_adbnx_adapter, + "IMDBGraph", + { + "vertexCollections": {"Users": {}, "Movies": {}}, + "edgeCollections": {"Ratings": {"_from", "_to", "ratings"}}, + }, + ), + ], +) +def test_create_networkx_graph( + adapter: ArangoDB_Networkx_Adapter, name: str, attributes: dict +): + assert_adapter_type(adapter) + nx_g = adapter.create_networkx_graph(name, attributes) + assert_networkx_data( + nx_g, + attributes["vertexCollections"], + attributes["edgeCollections"], + ) + + +@pytest.mark.unit +@pytest.mark.parametrize( + "adapter, name, vcols, ecols", + [ + ( + adbnx_adapter, + "fraud-detection", + {"account", "bank", "branch", "Class", "customer"}, + {"accountHolder", "Relationship", "transaction"}, + ) + ], +) +def test_create_networkx_graph_from_arangodb_collections( + adapter: ArangoDB_Networkx_Adapter, name: str, vcols: set, ecols: set +): + assert_adapter_type(adapter) + nx_g = adapter.create_networkx_graph_from_arangodb_collections( + name, + vcols, + ecols, + ) + assert_networkx_data(nx_g, vcols, ecols) + + +@pytest.mark.unit +@pytest.mark.parametrize( + "adapter, name, edge_definitions", + [(adbnx_adapter, "fraud-detection", None)], +) +def test_create_networkx_graph_from_arangodb_graph( + adapter: ArangoDB_Networkx_Adapter, name: str, edge_definitions +): + assert_adapter_type(adapter) + + # Re-create the graph if defintions are provided + if edge_definitions: + adapter.db.delete_graph(name, ignore_missing=True) + adapter.db.create_graph(name, edge_definitions=edge_definitions) + + arango_graph = adapter.db.graph(name) + v_cols = arango_graph.vertex_collections() + e_cols = {col["edge_collection"] for col in arango_graph.edge_definitions()} + + nx_g = adbnx_adapter.create_networkx_graph_from_arangodb_graph(name) + assert_networkx_data(nx_g, v_cols, e_cols) + + +@pytest.mark.unit +@pytest.mark.parametrize( + "adapter, name, nx_g, edge_definitions", + [ + ( + grid_adbnx_adapter, + "Grid", + get_grid_graph(), + [ + { + "edge_collection": "to", + "from_vertex_collections": ["Grid_Node"], + "to_vertex_collections": ["Grid_Node"], + } + ], + ), + ( + football_adbnx_adapter, + "Football", + get_football_graph(), + [ + { + "edge_collection": "played", + "from_vertex_collections": ["Football_Team"], + "to_vertex_collections": ["Football_Team"], + } + ], + ), + ( + karate_adbnx_adapter, + "Karate", + get_karate_graph(), + [ + { + "edge_collection": "knows", + "from_vertex_collections": ["Karate_Student"], + "to_vertex_collections": ["Karate_Student"], + } + ], + ), + ], +) +def test_create_arangodb_graph( + adapter: ArangoDB_Networkx_Adapter, + name: str, + nx_g: NxGraph, + edge_definitions: list, +): + assert_adapter_type(adapter) + adb_g = adapter.create_arangodb_graph(name, nx_g, edge_definitions) + assert_arangodb_data(adapter, nx_g, adb_g) + + +@pytest.mark.unit +def test_full_cycle_from_arangodb(): + name = "fraud-detection" + original_fraud_adb_g = adbnx_adapter.db.graph(name) + fraud_nx_g = adbnx_adapter.create_networkx_graph_from_arangodb_graph(name) + + edge_definitions = [ + { + "edge_collection": "accountHolder_nx", + "from_vertex_collections": ["customer_nx"], + "to_vertex_collections": ["account_nx"], + }, + { + "edge_collection": "transaction_nx", + "from_vertex_collections": ["account_nx"], + "to_vertex_collections": ["account_nx"], + }, + ] + + new_name = name + "-nx" + new_fraud_adb_g = adbnx_adapter.create_arangodb_graph( + new_name, fraud_nx_g, edge_definitions, keyify_edges=True + ) + + col: str + for col in original_fraud_adb_g.vertex_collections(): + new_col = col + "_nx" + for vertex in original_fraud_adb_g.vertex_collection(col): + assert new_fraud_adb_g.vertex_collection(new_col).has(vertex["_key"]) + + e_cols = {col["edge_collection"] for col in original_fraud_adb_g.edge_definitions()} + for col in e_cols: + new_col = col + "_nx" + for edge in original_fraud_adb_g.edge_collection(col): + assert new_fraud_adb_g.edge_collection(new_col).has(edge["_key"]) + + +@pytest.mark.unit +def test_full_cycle_from_arangodb_with_overwrite(): + name = "fraud-detection" + original_fraud_adb_g = adbnx_adapter.db.graph(name) + edge_definitions = original_fraud_adb_g.edge_definitions() + + col: str + original_doc_count = dict() + for col in original_fraud_adb_g.vertex_collections(): + original_doc_count[col] = original_fraud_adb_g.vertex_collection(col).count() + + e_cols = {col["edge_collection"] for col in original_fraud_adb_g.edge_definitions()} + for col in e_cols: + original_doc_count[col] = original_fraud_adb_g.edge_collection(col).count() + + fraud_nx_g = adbnx_adapter.create_networkx_graph_from_arangodb_graph(name) + + for _, node in fraud_nx_g.nodes(data=True): + node["new_vertex_data"] = ["new", "vertex", "data", "here"] + + for _, _, edge in fraud_nx_g.edges(data=True): + edge["new_edge_data"] = ["new", "edge", "data", "here"] + + updated_fraud_adb_g = adbnx_adapter.create_arangodb_graph( + name, fraud_nx_g, edge_definitions, overwrite=True, keyify_edges=True + ) + + for col in updated_fraud_adb_g.vertex_collections(): + new_doc_count = updated_fraud_adb_g.vertex_collection(col).count() + assert original_doc_count[col] == new_doc_count + for vertex in updated_fraud_adb_g.vertex_collection(col): + assert "new_vertex_data" in vertex + + e_cols = {col["edge_collection"] for col in updated_fraud_adb_g.edge_definitions()} + for col in e_cols: + new_doc_count = updated_fraud_adb_g.edge_collection(col).count() + assert original_doc_count[col] == new_doc_count + for edge in updated_fraud_adb_g.edge_collection(col): + assert "new_edge_data" in edge + + +@pytest.mark.unit +def test_full_cycle_from_networkx(): + name = "Grid" + if grid_adbnx_adapter.db.has_graph(name): + grid_adbnx_adapter.db.delete_graph(name, drop_collections=True) + + original_grid_nx_g = nx.grid_2d_graph(5, 5) + grid_edge_definitions = [ + { + "edge_collection": "to", + "from_vertex_collections": ["Grid_Node"], + "to_vertex_collections": ["Grid_Node"], + } + ] + + grid_adbnx_adapter.create_arangodb_graph( + name, original_grid_nx_g, grid_edge_definitions + ) + + new_grid_nx_g = grid_adbnx_adapter.create_networkx_graph_from_arangodb_graph(name) + + for id, _ in original_grid_nx_g.nodes(data=True): + assert new_grid_nx_g.has_node(id) + + for from_node, to_node, _ in original_grid_nx_g.edges(data=True): + assert new_grid_nx_g.has_edge(from_node, to_node) + + +def assert_adapter_type(adapter: ArangoDB_Networkx_Adapter): + assert type(adapter) is ArangoDB_Networkx_Adapter and issubclass( + type(adapter.cntrl), Base_ADBNX_Controller + ) + + +def assert_networkx_data(nx_g: NxGraph, v_cols, e_cols): + for col in v_cols: + for vertex in adbnx_adapter.db.collection(col): + assert nx_g.has_node(vertex["_id"]) + + for col in e_cols: + for edge in adbnx_adapter.db.collection(col): + assert nx_g.has_edge(edge["_from"], edge["_to"]) + + +def assert_arangodb_data( + adapter: ArangoDB_Networkx_Adapter, nx_g: NxGraph, adb_g: ArangoGraph +): + overwrite = False + for id, node in nx_g.nodes(data=True): + col = adapter.cntrl._identify_nx_node(id, node, overwrite) + key = adapter.cntrl._keyify_nx_node(id, node, col, overwrite) + assert adb_g.vertex_collection(col).has(key) + + for from_node_id, to_node_id, edge in nx_g.edges(data=True): + from_node = {"id": from_node_id, **nx_g.nodes[from_node_id]} + to_node = {"id": to_node_id, **nx_g.nodes[to_node_id]} + + col = adapter.cntrl._identify_nx_edge(edge, from_node, to_node, overwrite) + assert adb_g.edge_collection(col).find( + { + "_from": adapter.cntrl.adb_map.get(from_node["id"])["_id"], + "_to": adapter.cntrl.adb_map.get(to_node["id"])["_id"], + } + ) diff --git a/examples/ArangoDB_NetworkxAdapter.ipynb b/examples/ArangoDB_NetworkxAdapter.ipynb index fa197fa2..35806984 100644 --- a/examples/ArangoDB_NetworkxAdapter.ipynb +++ b/examples/ArangoDB_NetworkxAdapter.ipynb @@ -34,7 +34,7 @@ "id": "bpvZS-1aeG89" }, "source": [ - "Version: 0.0.0.2.5.3\n", + "Version: 1.0.0\n", "\n", "In this Notebook we learn how to export Graphs from [ArangoDB](https://www.arangodb.com/), a multi-model Graph Database into [NetworkX](https://networkx.github.io/), the swiss army knife for graph analysis ion python." ] @@ -58,14 +58,12 @@ "source": [ "%%capture\n", "!git clone -b oasis_connector --single-branch https://github.com/arangodb/interactive_tutorials.git\n", - "!git clone -b 0.0.0.2.5.3 --single-branch https://github.com/arangoml/networkx-adapter.git\n", + "!git clone -b 1.0.0 --single-branch https://github.com/arangoml/networkx-adapter.git\n", "!rsync -av networkx-adapter/examples/ ./ --exclude=.git\n", "!rsync -av interactive_tutorials/ ./ --exclude=.git\n", - "!pip3 install adbnx_adapter==0.0.0.2.5.3.post1\n", - "!pip3 install networkx\n", + "!pip3 install adbnx_adapter==1.0.0\n", "!pip3 install matplotlib\n", - "!pip3 install pyarango\n", - "!pip3 install python-arango" + "!pip3 install pyArango" ] }, { @@ -82,7 +80,8 @@ "import matplotlib.pyplot as plt\n", "\n", "\n", - "from adbnx_adapter.arangoDB_networkx_adapter import ArangoDB_Networkx_Adapter" + "from adbnx_adapter.adbnx_adapter import ArangoDB_Networkx_Adapter\n", + "from adbnx_adapter.adbnx_controller import Base_ADBNX_Controller" ] }, { @@ -102,16 +101,18 @@ "base_uri": "https://localhost:8080/" }, "id": "2ekGwnJDeG8-", - "outputId": "c0839114-a489-4fba-d438-198d930cdb90" + "outputId": "8323114f-e171-436d-8105-216db7176032" }, "outputs": [], "source": [ "# Request temporary instance from the managed ArangoDB Cloud Oasis.\n", "con = oasis.getTempCredentials()\n", "\n", - "# Connect the driver to the temp database\n", - "conn = oasis.connect(con)\n", - "db = conn[con[\"dbName\"]]\n", + "# Connect to the db via the python-arango driver\n", + "python_arango_db_driver = oasis.connect_python_arango(con)\n", + "\n", + "# (Alternative) Connect to the db via the pyArango driver\n", + "# pyarango_db_driver = oasis.connect(con)[con['dbName']]\n", "\n", "print()\n", "print(\"https://{}:{}\".format(con[\"hostname\"], con[\"port\"]))\n", @@ -164,12 +165,13 @@ "base_uri": "https://localhost:8080/" }, "id": "7bgGJ3QkeG8_", - "outputId": "4715de16-766a-4902-a132-d53bc59d4f63" + "outputId": "c8b0f442-d0bc-4ff5-c199-2a7efb610417" }, "outputs": [], "source": [ "!chmod -R 755 ./tools\n", - "!./tools/arangorestore -c none --server.endpoint http+ssl://{con[\"hostname\"]}:{con[\"port\"]} --server.username {con[\"username\"]} --server.database {con[\"dbName\"]} --server.password {con[\"password\"]} --default-replication-factor 3 --input-directory \"data/fraud_dump\"" + "!./tools/arangorestore -c none --server.endpoint http+ssl://{con[\"hostname\"]}:{con[\"port\"]} --server.username {con[\"username\"]} --server.database {con[\"dbName\"]} --server.password {con[\"password\"]} --default-replication-factor 3 --input-directory \"data/fraud_dump\"\n", + "!./tools/arangorestore -c none --server.endpoint http+ssl://{con[\"hostname\"]}:{con[\"port\"]} --server.username {con[\"username\"]} --server.database {con[\"dbName\"]} --server.password {con[\"password\"]} --default-replication-factor 3 --input-directory \"data/imdb_dump\"" ] }, { @@ -207,39 +209,29 @@ "base_uri": "https://localhost:8080/" }, "id": "PybHP7jpeG8_", - "outputId": "0c59cdb9-e67c-4e18-9791-69fdb4edbd6a" + "outputId": "2480707c-c9dd-43fc-8914-789b8b851596" }, "outputs": [], "source": [ - "from pyArango.collection import Collection, Edges, Field\n", - "from pyArango.graph import Graph, EdgeDefinition\n", - "\n", + "edge_definitions = [\n", + " {\n", + " \"edge_collection\": \"accountHolder\",\n", + " \"from_vertex_collections\": [\"customer\"],\n", + " \"to_vertex_collections\": [\"account\"],\n", + " },\n", + " {\n", + " \"edge_collection\": \"transaction\",\n", + " \"from_vertex_collections\": [\"account\"],\n", + " \"to_vertex_collections\": [\"account\"],\n", + " },\n", + "]\n", "\n", - "class account(Collection):\n", - " _fields = {\n", - " \"Name\": Field()\n", - " }\n", - " \n", - "class customer(Collection):\n", - " _fields = {\n", - " \"Name\": Field()\n", - " }\n", - " \n", - "class transaction(Edges): \n", - " _fields = {\n", - " }\n", + "name = \"fraud-detection\"\n", + "python_arango_db_driver.delete_graph(name, ignore_missing=True)\n", + "fraud_graph = python_arango_db_driver.create_graph(name, edge_definitions=edge_definitions)\n", "\n", - "class accountHolder(Edges): \n", - " _fields = {\n", - " }\n", - "\n", - "class FraudDetection(Graph) :\n", - " _edgeDefinitions = [EdgeDefinition(\"accountHolder\", fromCollections=[\"customer\"], toCollections=[\"account\"]),EdgeDefinition(\"transaction\", fromCollections=[\"account\"], toCollections=[\"account\"])]\n", - " _orphanedCollections = []\n", - "\n", - "fraudGraph = db.createGraph(\"FraudDetection\")\n", - "\n", - "print(\"Collection/Graph Setup done.\")" + "print(\"Graph Setup done.\")\n", + "print(fraud_graph)" ] }, { @@ -257,7 +249,7 @@ "id": "QfE_tKxneG9A" }, "source": [ - "# Connect ArangoDB and NetworkX " + "# Create Adapter" ] }, { @@ -266,18 +258,22 @@ "id": "kGfhzPT9eG9A" }, "source": [ - "We first connect the ArangoDB_Networkx_Adapter to our temp ArangoDB cluster:" + "Connect the ArangoDB_Networkx_Adapter to our temp ArangoDB cluster:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "oG496kBeeG9A" + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "oG496kBeeG9A", + "outputId": "9b2a2eb0-6973-43ab-ce9d-13f9dd4cf840" }, "outputs": [], "source": [ - "ma = ArangoDB_Networkx_Adapter(conn = con)" + "adbnx_adapter = ArangoDB_Networkx_Adapter(con)" ] }, { @@ -286,58 +282,107 @@ "id": "uByvwf9feG9A" }, "source": [ - "Next, we need to define the attributes in the vertex and edge collections to be included:\n", - "\n", - "*Note, we are currently working on making this step optional in the future!*" + "# ArangoDB to NetworkX\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "umy25EsUU6Lg" + }, + "source": [ + "## Via ArangoDB Attributes" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "UWX9-MsKeG9A" + "colab": { + "base_uri": "https://localhost:8080/", + "height": 392 + }, + "id": "UWX9-MsKeG9A", + "outputId": "08424915-0f86-4033-ea9a-6e9b540298f8" }, "outputs": [], "source": [ - "fraud_detection_attributes = { 'vertexCollections': \n", - " {'account': {'Balance', 'account_type', 'customer_id', 'rank'},\\\n", - " 'bank': {'Country', 'Id', 'bank_id', 'bank_name'},\\\n", - " 'branch':{'City', 'Country', 'Id', 'bank_id', 'branch_id', 'branch_name'},\\\n", - " 'Class':{'concrete', 'label', 'name'},\\\n", - " 'customer': {'Name', 'Sex', 'Ssn', 'rank'}},\\\n", - " 'edgeCollections' : \n", - " {'accountHolder': {'_from', '_to'},\\\n", - " 'Relationship': {'_from', '_to', 'label', 'name', 'relationshipType'},\\\n", - " 'transaction': {'_from', '_to'}}}" + "# Define attributes\n", + "fraud_detection_attributes = {\n", + " \"vertexCollections\": {\n", + " \"account\": {\"Balance\", \"account_type\", \"customer_id\", \"rank\"},\n", + " \"bank\": {\"Country\", \"Id\", \"bank_id\", \"bank_name\"},\n", + " \"branch\": {\"City\", \"Country\", \"Id\", \"bank_id\", \"branch_id\", \"branch_name\"},\n", + " \"Class\": {\"concrete\", \"label\", \"name\"},\n", + " \"customer\": {\"Name\", \"Sex\", \"Ssn\", \"rank\"},\n", + " },\n", + " \"edgeCollections\": {\n", + " \"accountHolder\": {\"_from\", \"_to\"},\n", + " \"Relationship\": {\"_from\", \"_to\", \"label\", \"name\", \"relationshipType\"},\n", + " \"transaction\": {\"_from\", \"_to\"},\n", + " },\n", + "}\n", + "\n", + "# Create NetworkX Graph from attributes\n", + "nx_g = adbnx_adapter.create_networkx_graph('FraudDetection', fraud_detection_attributes)\n", + "\n", + "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", + "# nx_g = adbnx_adapter.create_networkx_graph(graph_name = 'FraudDetection', fraud_detection_attributes, ttl=1000, stream=True)\n", + "# See more here: https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", + "\n", + "# Show graph data\n", + "print(nx_g.nodes(data=True))\n", + "print(nx_g.edges(data=True))\n", + "nx.draw(nx_g, with_labels=True)" ] }, { "cell_type": "markdown", "metadata": { - "id": "5pC59IV-eG9A" + "id": "RQ4CknYfUEuz" }, "source": [ - "Now, we can export the networkX graph:" + "## Via ArangoDB Collections" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "hPp6n66reG9A" + "colab": { + "base_uri": "https://localhost:8080/", + "height": 392 + }, + "id": "i4XOpdRLUNlJ", + "outputId": "9f62d623-bbed-489e-aecc-266cf7bdee3f" }, "outputs": [], "source": [ - "g = ma.create_networkx_graph(graph_name = 'FraudDetection', graph_attributes = fraud_detection_attributes)" + "# Define collection\n", + "vertex_collections = {\"account\", \"bank\", \"branch\", \"Class\", \"customer\"}\n", + "edge_collections = {\"accountHolder\", \"Relationship\", \"transaction\"}\n", + "\n", + "# Create NetworkX graph from ArangoDB collections\n", + "nx_g = adbnx_adapter.create_networkx_graph_from_arangodb_collections(\"fraud-detection\", vertex_collections, edge_collections)\n", + "\n", + "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", + "# nx_g = adbnx_adapter.create_networkx_graph_from_arangodb_collections(\"fraud-detection\", vertex_collections, edge_collections, ttl=1000, stream=True)\n", + "# See more here: https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", + "\n", + "# Show graph data\n", + "print(nx_g.nodes(data=True))\n", + "print(nx_g.edges(data=True))\n", + "nx.draw(nx_g, with_labels=True)" ] }, { "cell_type": "markdown", "metadata": { - "id": "gsDza0PBeG9A" + "id": "ZrEDmtqCVD0W" }, "source": [ - "From here on we can simply use all networkX functionality:" + "## Via ArangoDB Graph" ] }, { @@ -346,14 +391,36 @@ "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 319 + "height": 392 }, - "id": "cMlIdO9NeG9A", - "outputId": "e41f8e2a-b0d3-4009-f95e-083afff1b213" + "id": "zZ-Hu3lLVHgd", + "outputId": "edfc2a89-7fe2-4463-925d-e1a2940cbf63" }, "outputs": [], "source": [ - "nx.draw(g, with_labels=True)" + "# Define graph name\n", + "graph_name = \"fraud-detection\"\n", + "\n", + "# Create NetworkX graph from ArangoDB graph\n", + "nx_g = adbnx_adapter.create_networkx_graph_from_arangodb_graph(graph_name)\n", + "\n", + "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", + "# nx_g = adbnx_adapter.create_networkx_graph_from_arangodb_collections(graph_name, ttl=1000, stream=True)\n", + "# See more here: https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", + "\n", + "# Show graph data\n", + "print(nx_g.nodes(data=True))\n", + "print(nx_g.edges(data=True))\n", + "nx.draw(nx_g, with_labels=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tWU1YW9AViTA" + }, + "source": [ + "## Via ArangoDB Attributes with a customized controller" ] }, { @@ -363,12 +430,154 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "id": "KJmyw3JAeG9A", - "outputId": "d53d232d-132b-490d-8a3b-67607489b168" + "id": "QqGgOe51Vr85", + "outputId": "93ae3a05-1950-44c3-aa7a-15e039cd23cc" + }, + "outputs": [], + "source": [ + "# Introduce the new controller class\n", + "class IMDB_ADBNX_Controller(Base_ADBNX_Controller):\n", + " # We re-define how vertex pre-insertion should be treated, specifically for the IMDB dataset.\n", + " def _prepare_adb_vertex(self, vertex: dict, collection: str):\n", + " \"\"\"\n", + " Given an ArangoDB vertex, you can modify it before it gets inserted into the NetworkX graph, \n", + " and/or derive a custom node id for networkx to use.\n", + "\n", + " In most cases, it is only required to return the ArangoDB _id of the vertex.\n", + " \"\"\"\n", + " vertex[\"bipartite\"] = 0 if collection == \"Users\" else 1 # The new change\n", + " return vertex[\"_id\"] # This is standard\n", + "\n", + " # We're not interested in re-defining pre-insertion handling for edges, so we leave it be\n", + " # def _prepare_adb_edge(self, edge: dict, collection: str):\n", + " # return super()._prepare_adb_edge(edge, collection)\n", + "\n", + "# Instantiate the adapter\n", + "imdb_adbnx_adapter = ArangoDB_Networkx_Adapter(con, IMDB_ADBNX_Controller)\n", + "\n", + "# Define attributes\n", + "imdb_attributes = {\n", + " \"vertexCollections\": {\"Users\": {}, \"Movies\": {}},\n", + " \"edgeCollections\": {\"Ratings\": {\"_from\", \"_to\", \"ratings\"}},\n", + "}\n", + "\n", + "# Create NetworkX Graph from attributes using the custom IMDB_ArangoDB_Networx_Adapter\n", + "nx_g = imdb_adbnx_adapter.create_networkx_graph(\"IMDBGraph\", imdb_attributes)\n", + "\n", + "# You can also provide valid Python-Arango AQL query options to the command above, like such:\n", + "# nx_g = imdb_adbnx_adapter.create_networkx_graph(\"IMDBGraph\", imdb_attributes, ttl=1000, stream=True)\n", + "# See more here: https://docs.python-arango.com/en/main/specs.html#arango.aql.AQL.execute\n", + "\n", + "# Show graph data\n", + "print(nx_g.nodes(data=True))\n", + "# print(nx_g.edges(data=True)) # (will exceed IOPub data rate)\n", + "# nx.draw(nx_g, with_labels=True) # (will exceed IOPub data rate)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bvzJXSHHTi3v" + }, + "source": [ + "# NetworkX to ArangoDB" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UafSB_3JZNwK" + }, + "source": [ + "## Example 1: NetworkX Grid Graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 390 + }, + "id": "eRVbiBy4ZdE4", + "outputId": "98409e3a-8871-44db-99c6-f8626d5c2196" }, "outputs": [], "source": [ - "g.nodes()" + "# Load the nx graph & draw\n", + "grid_nx_g = nx.grid_2d_graph(5, 5)\n", + "nx.draw(grid_nx_g, with_labels=True)\n", + "\n", + "# Define edge defintions for the ArangoDB graph to understand\n", + "edge_definitions = [\n", + " {\n", + " \"edge_collection\": \"to\",\n", + " \"from_vertex_collections\": [\"Grid_Node\"],\n", + " \"to_vertex_collections\": [\"Grid_Node\"],\n", + " }\n", + "]\n", + "\n", + "# Introduce the new controller class\n", + "class Grid_ADBNX_Controller(Base_ADBNX_Controller):\n", + " def _identify_nx_node(self, id, node: dict, overwrite: bool) -> str:\n", + " \"\"\"\n", + " Given a NetworkX node, identify what ArangoDB collection should it belong to.\n", + "\n", + " NOTE: If your NetworkX graph does not comply to ArangoDB standards\n", + " (i.e a node's ID is not \"collection/key\"), then you must override this function.\n", + " \"\"\"\n", + " return \"Grid_Node\" # Only one node collection in this dataset\n", + "\n", + " def _identify_nx_edge(self, edge: dict, from_node: dict, to_node: dict, overwrite: bool) -> str:\n", + " \"\"\"\n", + " Given a NetworkX edge, its pair of nodes, and the overwrite boolean,\n", + " identify what ArangoDB collection should it belong to.\n", + "\n", + " NOTE: If your NetworkX graph does not comply to ArangoDB standards\n", + " (i.e a node's ID is not \"collection/key\"), then you must override this function.\n", + " \"\"\"\n", + " from_collection = self.adb_map.get(from_node[\"id\"])[\"collection\"]\n", + " to_collection = self.adb_map.get(to_node[\"id\"])[\"collection\"]\n", + "\n", + " if from_collection == to_collection == \"Grid_Node\":\n", + " return \"to\"\n", + "\n", + " return \"Unknown_Edge\"\n", + " \n", + " def _keyify_nx_node(self, id, node: dict, collection: str, overwrite: bool) -> str:\n", + " \"\"\"\n", + " Given a NetworkX node, derive its valid ArangoDB key.\n", + "\n", + " NOTE: If your NetworkX graph does not comply to ArangoDB standards\n", + " (i.e a node's ID is not \"collection/key\"), then you must override this function.\n", + " \"\"\"\n", + " # Since our NetworkX nodes have an id of type tuple, we can use the existing helper function.\n", + " return self._tuple_to_arangodb_key_helper(id)\n", + "\n", + "\n", + "# Instantiate the adapter\n", + "grid_adbnx_adapter = ArangoDB_Networkx_Adapter(con, Grid_ADBNX_Controller)\n", + "\n", + "# Create the ArangoDB graph\n", + "name = \"Grid\"\n", + "if python_arango_db_driver.has_graph(name):\n", + " python_arango_db_driver.delete_graph(name, drop_collections=True)\n", + "\n", + "grid_adb_g = grid_adbnx_adapter.create_arangodb_graph(name, grid_nx_g, edge_definitions) #, overwrite=True)\n", + "\n", + "\n", + "print(f\"Inspect the graph here: https://tutorials.arangodb.cloud:8529/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\")\n", + "print(f\"View the original graph here: https://networkx.org/documentation/stable/auto_examples/basic/plot_read_write.html#sphx-glr-auto-examples-basic-plot-read-write-py)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gshTlSX_ZZsS" + }, + "source": [ + "## Example 2: NetworkX Football Graph" ] }, { @@ -376,14 +585,278 @@ "execution_count": null, "metadata": { "colab": { - "base_uri": "https://localhost:8080/" + "base_uri": "https://localhost:8080/", + "height": 390 }, - "id": "eNxI-ctteG9A", - "outputId": "edaf5f28-22d8-4586-a43e-d955b48b8940" + "id": "dADiexlAioGH", + "outputId": "a3f0bba3-b594-44dd-e9fa-e107dfcdefaa" }, "outputs": [], "source": [ - "g.edges()" + "import io\n", + "import zipfile\n", + "import urllib.request as urllib\n", + "\n", + "# Load the nx graph & draw\n", + "url = \"http://www-personal.umich.edu/~mejn/netdata/football.zip\"\n", + "sock = urllib.urlopen(url)\n", + "s = io.BytesIO(sock.read())\n", + "sock.close()\n", + "zf = zipfile.ZipFile(s)\n", + "gml = zf.read(\"football.gml\").decode()\n", + "gml = gml.split(\"\\n\")[1:]\n", + "\n", + "football_nx_g = nx.parse_gml(gml)\n", + "nx.draw(football_nx_g, with_labels=True)\n", + "\n", + "# Define edge defintions for the ArangoDB graph to understand\n", + "edge_definitions = [\n", + " {\n", + " \"edge_collection\": \"played\",\n", + " \"from_vertex_collections\": [\"Football_Team\"],\n", + " \"to_vertex_collections\": [\"Football_Team\"],\n", + " }\n", + "]\n", + "\n", + "# Introduce the new controller class\n", + "class Football_ADBNX_Controller(Base_ADBNX_Controller):\n", + " def _identify_nx_node(self, id, node: dict, overwrite: bool) -> str:\n", + " return \"Football_Team\" # Only one node collection in this dataset=\n", + "\n", + " def _keyify_nx_node(self, id, node: dict, collection: str, overwrite: bool) -> str:\n", + " return self._string_to_arangodb_key_helper(id)\n", + "\n", + " def _identify_nx_edge(self, edge: dict, from_node: dict, to_node: dict, overwrite: bool) -> str:\n", + " from_collection = self.adb_map.get(from_node[\"id\"])[\"collection\"]\n", + " to_collection = self.adb_map.get(to_node[\"id\"])[\"collection\"]\n", + "\n", + " if from_collection == to_collection == \"Football_Team\":\n", + " return \"played\"\n", + "\n", + " return \"Unknown_Edge\"\n", + "\n", + "\n", + "# Instantiate the adapter\n", + "football_adbnx_adapter = ArangoDB_Networkx_Adapter(con, Football_ADBNX_Controller)\n", + "\n", + "# Create the ArangoDB graph\n", + "name = \"Football\"\n", + "if python_arango_db_driver.has_graph(name):\n", + " python_arango_db_driver.delete_graph(name, drop_collections=True)\n", + "\n", + "football_adb_g = football_adbnx_adapter.create_arangodb_graph(name, football_nx_g, edge_definitions) #, overwrite=True)\n", + "\n", + "\n", + "print(f\"Inspect the graph here: https://tutorials.arangodb.cloud:8529/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\")\n", + "print(f\"View the original graph here: https://networkx.org/documentation/stable/auto_examples/graph/plot_football.html#sphx-glr-auto-examples-graph-plot-football-py)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5Zl4fQ1AnC_b" + }, + "source": [ + "# Full Cycles" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Qh8bYrIqnHTa" + }, + "source": [ + "## From ArangoDB (ArangoDB to NetworkX to ArangoDB)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 390 + }, + "id": "BbPkJAEEoVjM", + "outputId": "6a5ba977-4b4d-4f04-cd9e-027f71b06261" + }, + "outputs": [], + "source": [ + "name = \"fraud-detection\"\n", + "\n", + "# Start from ArangoDB graph\n", + "original_fraud_adb_g = adbnx_adapter.db.graph(name) \n", + "\n", + "# Create NetworkX graph from ArangoDB graph\n", + "fraud_nx_g = adbnx_adapter.create_networkx_graph_from_arangodb_graph(name)\n", + "nx.draw(fraud_nx_g, with_labels=True)\n", + "\n", + "# Provide edge_definitions (we are preparing to re-translate back to ArangoDB)\n", + "edge_definitions = [\n", + " {\n", + " \"edge_collection\": \"accountHolder_nx\",\n", + " \"from_vertex_collections\": [\"customer_nx\"],\n", + " \"to_vertex_collections\": [\"account_nx\"],\n", + " },\n", + " {\n", + " \"edge_collection\": \"transaction_nx\",\n", + " \"from_vertex_collections\": [\"account_nx\"],\n", + " \"to_vertex_collections\": [\"account_nx\"],\n", + " },\n", + "]\n", + "\n", + "# Create ArangoDB graph from NetworkX graph\n", + "new_name = name + \"-nx\"\n", + "if python_arango_db_driver.has_graph(new_name):\n", + " python_arango_db_driver.delete_graph(new_name, drop_collections=True)\n", + "\n", + "# Keify edges to keep the same key values as original (this is optional)\n", + "new_fraud_adb_g = adbnx_adapter.create_arangodb_graph(new_name, fraud_nx_g, edge_definitions, keyify_edges=True) #, overwrite=True)\n", + "\n", + "print(f\"Inspect the new graph here: https://tutorials.arangodb.cloud:8529/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{new_name}\")\n", + "print(f\"View the original graph here: https://tutorials.arangodb.cloud:8529/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RTNNqQjpneFV" + }, + "source": [ + "## From ArangoDB (ArangoDB to NetworkX to ArangoDB) with overwrite" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 372 + }, + "id": "2wmcH2hgqLQq", + "outputId": "8b22a917-ef90-4d9d-84e3-96df734480f5" + }, + "outputs": [], + "source": [ + "name = \"fraud-detection\"\n", + "\n", + "# Start from ArangoDB graph\n", + "original_fraud_adb_g = adbnx_adapter.db.graph(name) \n", + "\n", + "# Create NetworkX graph from ArangoDB graph\n", + "fraud_nx_g = adbnx_adapter.create_networkx_graph_from_arangodb_graph(name)\n", + "nx.draw(fraud_nx_g, with_labels=True)\n", + "\n", + "# Modify the NetworkX graph\n", + "for _, node in fraud_nx_g.nodes(data=True):\n", + " node[\"new_vertex_data\"] = [\"new\", \"vertex\", \"data\", \"here\"]\n", + "\n", + "for _, _, edge in fraud_nx_g.edges(data=True):\n", + " edge[\"new_edge_data\"] = [\"new\", \"edge\", \"data\", \"here\"]\n", + "\n", + "# Provide edge_definitions (we are preparing to re-translate back to ArangoDB)\n", + "# Notice that we have removed the \"_nx\" suffix, because we want to overwrite.\n", + "edge_definitions = [\n", + " {\n", + " \"edge_collection\": \"accountHolder\",\n", + " \"from_vertex_collections\": [\"customer\"],\n", + " \"to_vertex_collections\": [\"account\"],\n", + " },\n", + " {\n", + " \"edge_collection\": \"transaction\",\n", + " \"from_vertex_collections\": [\"account\"],\n", + " \"to_vertex_collections\": [\"account\"],\n", + " },\n", + "]\n", + "\n", + "# Create ArangoDB graph from NetworkX graph\n", + "# Keify edges to keep the same key values as original (this is optional)\n", + "new_fraud_adb_g = adbnx_adapter.create_arangodb_graph(name, fraud_nx_g, edge_definitions, keyify_edges=True, overwrite=True)\n", + "\n", + "print(f\"Inspect the overwritten graph here: https://tutorials.arangodb.cloud:8529/_db/{con['dbName']}/_admin/aardvark/index.html#graph/{name}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uV8hpastnmhg" + }, + "source": [ + "## From NetworkX (NetworkX to ArangoDB to NetworkX)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 463 + }, + "id": "TFSM1Xegq9TR", + "outputId": "59975d47-43c1-4ff2-fe1e-98835ecf7235" + }, + "outputs": [], + "source": [ + "# Load the nx graph\n", + "original_grid_nx_g = nx.grid_2d_graph(5, 5)\n", + "print(original_grid_nx_g.nodes(data=True))\n", + "print(original_grid_nx_g.edges(data=True))\n", + "\n", + "# Re-introduce the Grid controller class\n", + "class Grid_ADBNX_Controller(Base_ADBNX_Controller):\n", + " def _prepare_adb_vertex(self, vertex: dict, collection: str):\n", + " nx_id = tuple(\n", + " int(n)\n", + " for n in tuple(\n", + " vertex[\"_key\"],\n", + " )\n", + " )\n", + " return nx_id\n", + "\n", + " def _identify_nx_node(self, id, node: dict, overwrite: bool) -> str:\n", + " return \"Grid_Node\" # Only one node collection in this dataset\n", + "\n", + " def _keyify_nx_node(self, id, node: dict, collection: str, overwrite: bool) -> str:\n", + " return self._tuple_to_arangodb_key_helper(id)\n", + "\n", + " def _identify_nx_edge(self, edge: dict, from_node: dict, to_node: dict, overwrite: bool) -> str:\n", + " from_collection = self.adb_map.get(from_node[\"id\"])[\"collection\"]\n", + " to_collection = self.adb_map.get(to_node[\"id\"])[\"collection\"]\n", + "\n", + " if from_collection == to_collection == \"Grid_Node\":\n", + " return \"to\"\n", + "\n", + " return \"Unknown_Edge\"\n", + "\n", + "# Re-instantiate the Grid adapter class\n", + "grid_adbnx_adapter = ArangoDB_Networkx_Adapter(con, Grid_ADBNX_Controller)\n", + "\n", + "# Delete the Grid graph if it already exists in ArangoDB\n", + "name = \"Grid\"\n", + "if python_arango_db_driver.has_graph(name):\n", + " python_arango_db_driver.delete_graph(name, drop_collections=True)\n", + "\n", + "# Define edge defintions for the ArangoDB graph to understand\n", + "edge_definitions = [\n", + " {\n", + " \"edge_collection\": \"to\",\n", + " \"from_vertex_collections\": [\"Grid_Node\"],\n", + " \"to_vertex_collections\": [\"Grid_Node\"],\n", + " }\n", + "]\n", + "\n", + "# Create the ArangoDB graph\n", + "grid_adbnx_adapter.create_arangodb_graph(name, original_grid_nx_g, edge_definitions)\n", + "\n", + "# Create the NetworkX graph from the ArangoDB graph\n", + "new_grid_nx_g = grid_adbnx_adapter.create_networkx_graph_from_arangodb_graph(name)\n", + "\n", + "# Draw the new graph\n", + "nx.draw(new_grid_nx_g, with_labels=True)\n", + "print(new_grid_nx_g.nodes(data=True))\n", + "print(new_grid_nx_g.edges(data=True))" ] }, { @@ -409,7 +882,10 @@ ], "metadata": { "colab": { - "name": "ArangoDB_NetworkxAdapter.ipynb", + "collapsed_sections": [ + "k9xyOIQ9eG9A" + ], + "name": "ArangoDB_NetworkxAdapter_v1.ipynb", "provenance": [] }, "kernelspec": { diff --git a/examples/creds.dat b/examples/creds.dat deleted file mode 100644 index c6a4bc6d..00000000 --- a/examples/creds.dat +++ /dev/null @@ -1 +0,0 @@ -{"dbName":"TUTjnhwvs3l7289s7t3peifi","username":"TUTpx7x083gmpeya5xvld55","password":"TUTtkthod042cnc9xyzl5o9n","hostname":"d383fa0b596a.arangodb.cloud","port":8529} \ No newline at end of file diff --git a/examples/data/imdb_dump/ENCRYPTION b/examples/data/imdb_dump/ENCRYPTION new file mode 100644 index 00000000..c86c3f35 --- /dev/null +++ b/examples/data/imdb_dump/ENCRYPTION @@ -0,0 +1 @@ +none \ No newline at end of file diff --git a/examples/data/imdb_dump/Movies.structure.json b/examples/data/imdb_dump/Movies.structure.json new file mode 100644 index 00000000..eb9d80c3 --- /dev/null +++ b/examples/data/imdb_dump/Movies.structure.json @@ -0,0 +1 @@ +{"allInSync":true,"indexes":[],"isReady":true,"parameters":{"cacheEnabled":false,"deleted":false,"distributeShardsLike":"_graphs","globallyUniqueId":"c2730595280/","id":"2730595280","isDisjoint":false,"isSmart":false,"isSmartChild":false,"isSystem":false,"keyOptions":{"allowUserKeys":true,"type":"traditional"},"minReplicationFactor":1,"name":"Movies","numberOfShards":1,"planId":"2730595280","replicationFactor":3,"schema":null,"shardKeys":["_key"],"shardingStrategy":"hash","shards":{"s2730595281":["PRMR-1vqwuhks","PRMR-bvgkeorm","PRMR-zpamyasv"]},"status":3,"type":2,"waitForSync":false,"writeConcern":1},"planVersion":10402} \ No newline at end of file diff --git a/examples/data/imdb_dump/Movies_80662e1f485e79d07ef4973f6b1b9f88.data.json.gz b/examples/data/imdb_dump/Movies_80662e1f485e79d07ef4973f6b1b9f88.data.json.gz new file mode 100644 index 00000000..b838d29e Binary files /dev/null and b/examples/data/imdb_dump/Movies_80662e1f485e79d07ef4973f6b1b9f88.data.json.gz differ diff --git a/examples/data/imdb_dump/Ratings.structure.json b/examples/data/imdb_dump/Ratings.structure.json new file mode 100644 index 00000000..8571f0d8 --- /dev/null +++ b/examples/data/imdb_dump/Ratings.structure.json @@ -0,0 +1 @@ +{"allInSync":true,"indexes":[],"isReady":true,"parameters":{"cacheEnabled":false,"deleted":false,"distributeShardsLike":"_graphs","globallyUniqueId":"c2728580616/","id":"2728580616","isDisjoint":false,"isSmart":false,"isSmartChild":false,"isSystem":false,"keyOptions":{"allowUserKeys":true,"type":"traditional"},"minReplicationFactor":1,"name":"Ratings","numberOfShards":1,"planId":"2728580616","replicationFactor":3,"schema":null,"shardKeys":["_key"],"shardingStrategy":"hash","shards":{"s2728580617":["PRMR-1vqwuhks","PRMR-bvgkeorm","PRMR-zpamyasv"]},"status":3,"type":3,"waitForSync":false,"writeConcern":1},"planVersion":10408} \ No newline at end of file diff --git a/examples/data/imdb_dump/Ratings_e8dcd33ae274522f351c266f028eed7b.data.json.gz b/examples/data/imdb_dump/Ratings_e8dcd33ae274522f351c266f028eed7b.data.json.gz new file mode 100644 index 00000000..b604626a Binary files /dev/null and b/examples/data/imdb_dump/Ratings_e8dcd33ae274522f351c266f028eed7b.data.json.gz differ diff --git a/examples/data/imdb_dump/Users.structure.json b/examples/data/imdb_dump/Users.structure.json new file mode 100644 index 00000000..e5420b38 --- /dev/null +++ b/examples/data/imdb_dump/Users.structure.json @@ -0,0 +1 @@ +{"allInSync":true,"indexes":[],"isReady":true,"parameters":{"cacheEnabled":false,"deleted":false,"distributeShardsLike":"_graphs","globallyUniqueId":"c2728580582/","id":"2728580582","isDisjoint":false,"isSmart":false,"isSmartChild":false,"isSystem":false,"keyOptions":{"allowUserKeys":true,"type":"traditional"},"minReplicationFactor":1,"name":"Users","numberOfShards":1,"planId":"2728580582","replicationFactor":3,"schema":null,"shardKeys":["_key"],"shardingStrategy":"hash","shards":{"s2728580583":["PRMR-1vqwuhks","PRMR-bvgkeorm","PRMR-zpamyasv"]},"status":3,"type":2,"waitForSync":false,"writeConcern":1},"planVersion":10405} \ No newline at end of file diff --git a/examples/data/imdb_dump/Users_f9aae5fda8d810a29f12d1e61b4ab25f.data.json.gz b/examples/data/imdb_dump/Users_f9aae5fda8d810a29f12d1e61b4ab25f.data.json.gz new file mode 100644 index 00000000..4eb3a4cd Binary files /dev/null and b/examples/data/imdb_dump/Users_f9aae5fda8d810a29f12d1e61b4ab25f.data.json.gz differ diff --git a/examples/data/imdb_dump/dump.json b/examples/data/imdb_dump/dump.json new file mode 100644 index 00000000..b2a69d99 --- /dev/null +++ b/examples/data/imdb_dump/dump.json @@ -0,0 +1 @@ +{"database":"TUTdit9ohpgz1ntnbetsjstwi","lastTickAtDumpStart":"2732644865","properties":{"id":"2728554641","name":"TUTdit9ohpgz1ntnbetsjstwi","isSystem":false,"sharding":"","replicationFactor":1,"writeConcern":1,"path":""}} \ No newline at end of file diff --git a/examples/oasis.py b/examples/oasis.py deleted file mode 100644 index 974d7e18..00000000 --- a/examples/oasis.py +++ /dev/null @@ -1,81 +0,0 @@ -import json -import requests -import sys -import time - -from pyArango.connection import * -from arango import ArangoClient - -# retrieving credentials from ArangoDB tutorial service -def getTempCredentials(tutorialName=None,credentialProvider="https://d383fa0b596a.arangodb.cloud:8529/_db/_system/tutorialDB/tutorialDB"): - with open("creds.dat","r+") as cacheFile: - contents = cacheFile.readline() - if len(contents) > 0: - login = None - url = "" - - # check if credentials are still valid - try: - login = json.loads(contents) - url = "https://"+login["hostname"]+":"+str(login["port"]) - except: - # Incorrect data in cred file and retrieve new credentials - cacheFile.truncate(0) - pass - - conn ="" - if (login is not None): - try: - conn = Connection(arangoURL=url, username=login["username"], password=login["password"],) - print("Reusing cached credentials.") - return login - except: - print("Credentials expired.") - pass # Ignore and retrieve new - - # Retrieve new credentials from Foxx Service - print("Requesting new temp credentials.") - if (tutorialName is not None): - body = { - "tutorialName": tutorialName - } - else: - body = "{}" - - url = credentialProvider - x = requests.post(url, data = json.dumps(body)) - - if x.status_code != 200: - print("Error retrieving login data.") - sys.exit() - # Caching credentials - cacheFile.truncate(0) - cacheFile.write(x.text) - print("Temp database ready to use.") - return json.loads(x.text) - -# Connect against an oasis DB and return pyarango connection -def connect(login): - url = "https://"+login["hostname"]+":"+str(login["port"]) - conn = None - try: - conn = Connection(arangoURL=url, username=login["username"], password=login["password"],) - except: - time.sleep(1) - conn = Connection(arangoURL=url, username=login["username"], password=login["password"],) - return conn - -# Connect against an oasis DB and return pyarango connection -def connect_python_arango(login): - url = "https://"+login["hostname"]+":"+str(login["port"]) - database = None - # Initialize the ArangoDB client. - client = ArangoClient(hosts=url) - try: - database = client.db(login["dbName"], username=login["username"], password=login["password"]) - except: - time.sleep(1) - database = client.db(login["dbName"], username=login["username"], password=login["password"]) - return database - - \ No newline at end of file