diff --git a/notebooks/demo/nxcg_wikipedia_e2e.ipynb b/notebooks/demo/nxcg_wikipedia_e2e.ipynb deleted file mode 100644 index 872c8860b91..00000000000 --- a/notebooks/demo/nxcg_wikipedia_e2e.ipynb +++ /dev/null @@ -1,219 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# `nx-cugraph` Demo - Wikipedia Pagerank\n", - "\n", - "This notebook demonstrates a zero code change, end-to-end workflow using `cudf.pandas` and `nx-cugraph`." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# Uncomment these two lines to enable GPU acceleration\n", - "# The rest of the code stays the same!\n", - "\n", - "# %load_ext cudf.pandas\n", - "# !NETWORKX_BACKEND_PRIORITY=cugraph\n", - "\n", - "import pandas as pd\n", - "import networkx as nx" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Downloading the data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# wget \"https://data.rapids.ai/cugraph/datasets/\" # Use this command to download datasets from the web" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "edgelist_csv = \"enwiki-20240620-edges.csv\"\n", - "nodedata_csv = \"enwiki-20240620-nodeids.csv\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Timed end-to-end code" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Read in the Wikipedia Connectivity data from `edgelist_csv`" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "%%time \n", - "edgelist_df = pd.read_csv(\n", - " edgelist_csv,\n", - " sep=\" \",\n", - " names=[\"src\", \"dst\"],\n", - " dtype=\"int32\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Read in the Wikipedia pages metadata from `nodedata_csv`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "nodedata_df = pd.read_csv(\n", - " nodedata_csv,\n", - " sep=\"\\t\",\n", - " names=[\"nodeid\", \"title\"],\n", - " dtype={\"nodeid\": \"int32\", \"title\": \"str\"},\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create a NetworkX graph from the connectivity info we just loaded" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "G = nx.from_pandas_edgelist(\n", - " edgelist_df,\n", - " source=\"src\",\n", - " target=\"dst\",\n", - " create_using=nx.DiGraph,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Run the Pagerank algorithm on the NetworkX graph" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "nx_pr_vals = nx.pagerank(G)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create a DataFrame containing the resulting pagerank values for each nodeid" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "pagerank_df = pd.DataFrame({\n", - " \"nodeid\": nx_pr_vals.keys(),\n", - " \"pagerank\": nx_pr_vals.values()\n", - "})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finally, add the NetworkX results to `nodedata` as a new column." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "nodedata_df = nodedata_df.merge(pagerank_df, how=\"left\", on=\"nodeid\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Showing the top 25 pages based on pagerank value" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "nodedata_df.sort_values(by=\"pagerank\", ascending=False).head(25)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "devenv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}