diff --git a/.gitignore b/.gitignore
index c6bcf6965d7..358650cfc5a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -84,8 +84,10 @@ datasets/*
 # Jupyter Notebooks
 .ipynb_checkpoints
 
-## Doxygen
+## Doxygen and Docs
 cpp/doxygen/html
+docs/cugraph/lib*
+docs/cugraph/api/*
 
 # created by Dask tests
 python/dask-worker-space
diff --git a/build.sh b/build.sh
index 1723e750978..eef19046d85 100755
--- a/build.sh
+++ b/build.sh
@@ -18,6 +18,8 @@ ARGS=$*
 # script, and that this script resides in the repo dir!
 REPODIR=$(cd $(dirname $0); pwd)
 
+RAPIDS_VERSION=23.12
+
 # Valid args to this script (all possible targets and options) - only one per line
 VALIDARGS="
    clean
@@ -412,8 +414,28 @@ if hasArg docs || hasArg all; then
               ${CMAKE_GENERATOR_OPTION} \
               ${CMAKE_VERBOSE_OPTION}
     fi
+
+    for PROJECT in libcugraphops libwholegraph; do
+        XML_DIR="${REPODIR}/docs/cugraph/${PROJECT}"
+        rm -rf "${XML_DIR}"
+        mkdir -p "${XML_DIR}"
+        export XML_DIR_${PROJECT^^}="$XML_DIR"
+
+        echo "downloading xml for ${PROJECT} into ${XML_DIR}. Environment variable XML_DIR_${PROJECT^^} is set to ${XML_DIR}"
+        curl -O "https://d1664dvumjb44w.cloudfront.net/${PROJECT}/xml_tar/${RAPIDS_VERSION}/xml.tar.gz"
+        tar -xzf xml.tar.gz -C "${XML_DIR}"
+        rm "./xml.tar.gz"
+    done
+
     cd ${LIBCUGRAPH_BUILD_DIR}
     cmake --build "${LIBCUGRAPH_BUILD_DIR}" -j${PARALLEL_LEVEL} --target docs_cugraph ${VERBOSE_FLAG}
+
+    echo "making libcugraph doc dir"
+    rm -rf ${REPODIR}/docs/cugraph/libcugraph
+    mkdir -p ${REPODIR}/docs/cugraph/libcugraph
+
+    export XML_DIR_LIBCUGRAPH="${REPODIR}/cpp/doxygen/xml"
+
     cd ${REPODIR}/docs/cugraph
     make html
 fi
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 3f97f652d41..3f765704bdb 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -29,7 +29,9 @@ rapids-mamba-retry install \
   cugraph-pyg \
   cugraph-service-server \
   cugraph-service-client \
-  libcugraph_etl
+  libcugraph_etl \
+  pylibcugraphops \
+  pylibwholegraph
 
 # This command installs `cugraph-dgl` without its dependencies
 # since this package can currently only run in `11.6` CTK environments
@@ -50,8 +52,7 @@ done
 rapids-logger "Build CPP docs"
 pushd cpp/doxygen
 doxygen Doxyfile
-mkdir -p "${RAPIDS_DOCS_DIR}/libcugraph/html"
-mv html/* "${RAPIDS_DOCS_DIR}/libcugraph/html"
+export XML_DIR_LIBCUGRAPH="$(pwd)/xml"
 popd
 
 rapids-logger "Build Python docs"
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 69eb085e7ed..c091bd1ed33 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -54,6 +54,10 @@ sed_runner "s/set(cugraph_version .*)/set(cugraph_version ${NEXT_FULL_TAG})/g" p
 sed_runner 's/version = .*/version = '"'${NEXT_SHORT_TAG}'"'/g' docs/cugraph/source/conf.py
 sed_runner 's/release = .*/release = '"'${NEXT_FULL_TAG}'"'/g' docs/cugraph/source/conf.py
 
+
+# build.sh script
+sed_runner 's/RAPIDS_VERSION=.*/RAPIDS_VERSION='${NEXT_SHORT_TAG}'/g' build.sh
+
 # Centralized version file update
 # NOTE: Any script that runs in CI will need to use gha-tool `rapids-generate-version`
 # and echo it to `VERSION` file to get an alpha spec of the current version
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 3e867643041..836d5569ef7 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -192,6 +192,7 @@ set(CUGRAPH_SOURCES
     src/detail/shuffle_vertex_pairs.cu
     src/detail/collect_local_vertex_values.cu
     src/detail/groupby_and_count.cu
+    src/detail/collect_comm_wrapper.cu
     src/sampling/random_walks_mg.cu
     src/community/detail/common_methods_mg.cu
     src/community/detail/common_methods_sg.cu
@@ -201,6 +202,8 @@ set(CUGRAPH_SOURCES
     src/community/detail/mis_mg.cu
     src/detail/utility_wrappers.cu
     src/structure/graph_view_mg.cu
+    src/structure/remove_self_loops.cu
+    src/structure/remove_multi_edges.cu
     src/utilities/path_retrieval.cu
     src/structure/legacy/graph.cu
     src/linear_assignment/legacy/hungarian.cu
@@ -443,6 +446,7 @@ add_library(cugraph_c
         src/c_api/labeling_result.cpp
         src/c_api/weakly_connected_components.cpp
         src/c_api/strongly_connected_components.cpp
+        src/c_api/allgather.cpp
         src/c_api/legacy_k_truss.cpp
         )
 add_library(cugraph::cugraph_c ALIAS cugraph_c)
diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile
index 482ff988098..6946bd38bfe 100644
--- a/cpp/doxygen/Doxyfile
+++ b/cpp/doxygen/Doxyfile
@@ -1,4 +1,4 @@
-# Doxyfile 1.8.20
+# Doxyfile 1.9.8
 
 # This file describes the settings to be used by the documentation system
 # doxygen (www.doxygen.org) for a project.
@@ -12,6 +12,16 @@
 # For lists, items can also be appended using:
 # TAG += value [value, ...]
 # Values that contain spaces should be placed between quotes (\" \").
+#
+# Note:
+#
+# Use doxygen to compare the used configuration file with the template
+# configuration file:
+# doxygen -x [configFile]
+# Use doxygen to compare the used configuration file with the template
+# configuration file without replacing the environment variables or CMake type
+# replacement variables:
+# doxygen -x_noenv [configFile]
 
 #---------------------------------------------------------------------------
 # Project related configuration options
@@ -32,19 +42,19 @@ DOXYFILE_ENCODING      = UTF-8
 # title of most generated pages and in a few other places.
 # The default value is: My Project.
 
-PROJECT_NAME           = "libcugraph"
+PROJECT_NAME           = libcugraph
 
 # The PROJECT_NUMBER tag can be used to enter a project or revision number. This
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER=23.12
+PROJECT_NUMBER         = 23.12
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
 # quick idea about the purpose of the project. Keep the description short.
 
-PROJECT_BRIEF          = GPU accelerated graph analytics
+PROJECT_BRIEF          = "GPU accelerated graph analytics"
 
 # With the PROJECT_LOGO tag one can specify a logo or an icon that is included
 # in the documentation. The maximum height of the logo should not exceed 55
@@ -60,16 +70,28 @@ PROJECT_LOGO           =
 
 OUTPUT_DIRECTORY       =
 
-# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
-# directories (in 2 levels) under the output directory of each output format and
-# will distribute the generated files over these directories. Enabling this
+# If the CREATE_SUBDIRS tag is set to YES then doxygen will create up to 4096
+# sub-directories (in 2 levels) under the output directory of each output format
+# and will distribute the generated files over these directories. Enabling this
 # option can be useful when feeding doxygen a huge amount of source files, where
 # putting all generated files in the same directory would otherwise causes
-# performance problems for the file system.
+# performance problems for the file system. Adapt CREATE_SUBDIRS_LEVEL to
+# control the number of sub-directories.
 # The default value is: NO.
 
 CREATE_SUBDIRS         = NO
 
+# Controls the number of sub-directories that will be created when
+# CREATE_SUBDIRS tag is set to YES. Level 0 represents 16 directories, and every
+# level increment doubles the number of directories, resulting in 4096
+# directories at level 8 which is the default and also the maximum value. The
+# sub-directories are organized in 2 levels, the first level always has a fixed
+# number of 16 directories.
+# Minimum value: 0, maximum value: 8, default value: 8.
+# This tag requires that the tag CREATE_SUBDIRS is set to YES.
+
+CREATE_SUBDIRS_LEVEL   = 8
+
 # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII
 # characters to appear in the names of generated files. If set to NO, non-ASCII
 # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode
@@ -81,26 +103,18 @@ ALLOW_UNICODE_NAMES    = NO
 # The OUTPUT_LANGUAGE tag is used to specify the language in which all
 # documentation generated by doxygen is written. Doxygen will use this
 # information to generate all constant output in the proper language.
-# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
-# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
-# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
-# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
-# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
-# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
-# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
-# Ukrainian and Vietnamese.
+# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Bulgarian,
+# Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, English
+# (United States), Esperanto, Farsi (Persian), Finnish, French, German, Greek,
+# Hindi, Hungarian, Indonesian, Italian, Japanese, Japanese-en (Japanese with
+# English messages), Korean, Korean-en (Korean with English messages), Latvian,
+# Lithuanian, Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese,
+# Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish,
+# Swedish, Turkish, Ukrainian and Vietnamese.
 # The default value is: English.
 
 OUTPUT_LANGUAGE        = English
 
-# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all generated output in the proper direction.
-# Possible values are: None, LTR, RTL and Context.
-# The default value is: None.
-
-OUTPUT_TEXT_DIRECTION  = None
-
 # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
 # descriptions after the members that are listed in the file and class
 # documentation (similar to Javadoc). Set to NO to disable this.
@@ -248,16 +262,16 @@ TAB_SIZE               = 4
 # the documentation. An alias has the form:
 # name=value
 # For example adding
-# "sideeffect=@par Side Effects:\n"
+# "sideeffect=@par Side Effects:^^"
 # will allow you to put the command \sideeffect (or @sideeffect) in the
 # documentation, which will result in a user-defined paragraph with heading
-# "Side Effects:". You can put \n's in the value part of an alias to insert
-# newlines (in the resulting output). You can put ^^ in the value part of an
-# alias to insert a newline as if a physical newline was in the original file.
-# When you need a literal { or } or , in the value part of an alias you have to
-# escape them by means of a backslash (\), this can lead to conflicts with the
-# commands \{ and \} for these it is advised to use the version @{ and @} or use
-# a double escape (\\{ and \\})
+# "Side Effects:". Note that you cannot put \n's in the value part of an alias
+# to insert newlines (in the resulting output). You can put ^^ in the value part
+# of an alias to insert a newline as if a physical newline was in the original
+# file. When you need a literal { or } or , in the value part of an alias you
+# have to escape them by means of a backslash (\), this can lead to conflicts
+# with the commands \{ and \} for these it is advised to use the version @{ and
+# @} or use a double escape (\\{ and \\})
 
 ALIASES                =
 
@@ -302,8 +316,8 @@ OPTIMIZE_OUTPUT_SLICE  = NO
 # extension. Doxygen has a built-in mapping, but you can override or extend it
 # using this tag. The format is ext=language, where ext is a file extension, and
 # language is one of the parsers supported by doxygen: IDL, Java, JavaScript,
-# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL,
-# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:
+# Csharp (C#), C, C++, Lex, D, PHP, md (Markdown), Objective-C, Python, Slice,
+# VHDL, Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:
 # FortranFree, unknown formatted Fortran: Fortran. In the later case the parser
 # tries to guess whether the code is fixed or free formatted code, this is the
 # default for Fortran type files). For instance to make doxygen treat .inc files
@@ -313,7 +327,10 @@ OPTIMIZE_OUTPUT_SLICE  = NO
 # Note: For files without extension you can use no_extension as a placeholder.
 #
 # Note that for custom extensions you also need to set FILE_PATTERNS otherwise
-# the files are not read by doxygen.
+# the files are not read by doxygen. When specifying no_extension you should add
+# * to the FILE_PATTERNS.
+#
+# Note see also the list of default file extension mappings.
 
 EXTENSION_MAPPING      = cu=C++ \
                          cuh=C++
@@ -337,6 +354,17 @@ MARKDOWN_SUPPORT       = YES
 
 TOC_INCLUDE_HEADINGS   = 5
 
+# The MARKDOWN_ID_STYLE tag can be used to specify the algorithm used to
+# generate identifiers for the Markdown headings. Note: Every identifier is
+# unique.
+# Possible values are: DOXYGEN use a fixed 'autotoc_md' string followed by a
+# sequence number starting at 0 and GITHUB use the lower case version of title
+# with any whitespace replaced by '-' and punctuation characters removed.
+# The default value is: DOXYGEN.
+# This tag requires that the tag MARKDOWN_SUPPORT is set to YES.
+
+MARKDOWN_ID_STYLE      = DOXYGEN
+
 # When enabled doxygen tries to link words that correspond to documented
 # classes, or namespaces to their corresponding documentation. Such a link can
 # be prevented in individual cases by putting a % sign in front of the word or
@@ -448,19 +476,27 @@ TYPEDEF_HIDES_STRUCT   = NO
 
 LOOKUP_CACHE_SIZE      = 0
 
-# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use
+# The NUM_PROC_THREADS specifies the number of threads doxygen is allowed to use
 # during processing. When set to 0 doxygen will based this on the number of
 # cores available in the system. You can set it explicitly to a value larger
 # than 0 to get more control over the balance between CPU load and processing
 # speed. At this moment only the input processing can be done using multiple
 # threads. Since this is still an experimental feature the default is set to 1,
-# which efficively disables parallel processing. Please report any issues you
+# which effectively disables parallel processing. Please report any issues you
 # encounter. Generating dot graphs in parallel is controlled by the
 # DOT_NUM_THREADS setting.
 # Minimum value: 0, maximum value: 32, default value: 1.
 
 NUM_PROC_THREADS       = 1
 
+# If the TIMESTAMP tag is set different from NO then each generated page will
+# contain the date or date and time when the page was generated. Setting this to
+# NO can help when comparing the output of multiple runs.
+# Possible values are: YES, NO, DATETIME and DATE.
+# The default value is: NO.
+
+TIMESTAMP              = NO
+
 #---------------------------------------------------------------------------
 # Build related configuration options
 #---------------------------------------------------------------------------
@@ -524,6 +560,13 @@ EXTRACT_LOCAL_METHODS  = NO
 
 EXTRACT_ANON_NSPACES   = NO
 
+# If this flag is set to YES, the name of an unnamed parameter in a declaration
+# will be determined by the corresponding definition. By default unnamed
+# parameters remain unnamed in the output.
+# The default value is: YES.
+
+RESOLVE_UNNAMED_PARAMS = YES
+
 # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
 # undocumented members inside documented classes or files. If set to NO these
 # members will be included in the various overviews, but no documentation
@@ -535,7 +578,8 @@ HIDE_UNDOC_MEMBERS     = NO
 # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
 # undocumented classes that are normally visible in the class hierarchy. If set
 # to NO, these classes will be included in the various overviews. This option
-# has no effect if EXTRACT_ALL is enabled.
+# will also hide undocumented C++ concepts if enabled. This option has no effect
+# if EXTRACT_ALL is enabled.
 # The default value is: NO.
 
 HIDE_UNDOC_CLASSES     = NO
@@ -561,12 +605,20 @@ HIDE_IN_BODY_DOCS      = NO
 
 INTERNAL_DOCS          = NO
 
-# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
-# names in lower-case letters. If set to YES, upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# (including Cygwin) and Mac users are advised to set this option to NO.
-# The default value is: system dependent.
+# With the correct setting of option CASE_SENSE_NAMES doxygen will better be
+# able to match the capabilities of the underlying filesystem. In case the
+# filesystem is case sensitive (i.e. it supports files in the same directory
+# whose names only differ in casing), the option must be set to YES to properly
+# deal with such files in case they appear in the input. For filesystems that
+# are not case sensitive the option should be set to NO to properly deal with
+# output files written for symbols that only differ in casing, such as for two
+# classes, one named CLASS and the other named Class, and to also support
+# references to files without having to specify the exact matching casing. On
+# Windows (including Cygwin) and MacOS, users should typically set this option
+# to NO, whereas on Linux or other Unix flavors it should typically be set to
+# YES.
+# Possible values are: SYSTEM, NO and YES.
+# The default value is: SYSTEM.
 
 CASE_SENSE_NAMES       = YES
 
@@ -584,6 +636,12 @@ HIDE_SCOPE_NAMES       = NO
 
 HIDE_COMPOUND_REFERENCE= NO
 
+# If the SHOW_HEADERFILE tag is set to YES then the documentation for a class
+# will show which file needs to be included to use the class.
+# The default value is: YES.
+
+SHOW_HEADERFILE        = YES
+
 # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
 # the files that are included by a file in the documentation of that file.
 # The default value is: YES.
@@ -741,7 +799,8 @@ FILE_VERSION_FILTER    =
 # output files in an output format independent way. To create the layout file
 # that represents doxygen's defaults, run doxygen with the -l option. You can
 # optionally specify a file name after the option, if omitted DoxygenLayout.xml
-# will be used as the name of the layout file.
+# will be used as the name of the layout file. See also section "Changing the
+# layout of pages" for information.
 #
 # Note that if you run doxygen from a directory containing a file called
 # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
@@ -787,24 +846,50 @@ WARNINGS               = YES
 WARN_IF_UNDOCUMENTED   = YES
 
 # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
-# potential errors in the documentation, such as not documenting some parameters
-# in a documented function, or documenting parameters that don't exist or using
-# markup commands wrongly.
+# potential errors in the documentation, such as documenting some parameters in
+# a documented function twice, or documenting parameters that don't exist or
+# using markup commands wrongly.
 # The default value is: YES.
 
 WARN_IF_DOC_ERROR      = YES
 
+# If WARN_IF_INCOMPLETE_DOC is set to YES, doxygen will warn about incomplete
+# function parameter documentation. If set to NO, doxygen will accept that some
+# parameters have no documentation without warning.
+# The default value is: YES.
+
+WARN_IF_INCOMPLETE_DOC = YES
+
 # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
 # are documented, but have no documentation for their parameters or return
-# value. If set to NO, doxygen will only warn about wrong or incomplete
-# parameter documentation, but not about the absence of documentation. If
-# EXTRACT_ALL is set to YES then this flag will automatically be disabled.
+# value. If set to NO, doxygen will only warn about wrong parameter
+# documentation, but not about the absence of documentation. If EXTRACT_ALL is
+# set to YES then this flag will automatically be disabled. See also
+# WARN_IF_INCOMPLETE_DOC
 # The default value is: NO.
 
 WARN_NO_PARAMDOC       = YES
 
+# If WARN_IF_UNDOC_ENUM_VAL option is set to YES, doxygen will warn about
+# undocumented enumeration values. If set to NO, doxygen will accept
+# undocumented enumeration values. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: NO.
+
+WARN_IF_UNDOC_ENUM_VAL = NO
+
 # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
-# a warning is encountered.
+# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS
+# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but
+# at the end of the doxygen process doxygen will return with a non-zero status.
+# If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS_PRINT then doxygen behaves
+# like FAIL_ON_WARNINGS but in case no WARN_LOGFILE is defined doxygen will not
+# write the warning messages in between other messages but write them at the end
+# of a run, in case a WARN_LOGFILE is defined the warning messages will be
+# besides being in the defined file also be shown at the end of a run, unless
+# the WARN_LOGFILE is defined as - i.e. standard output (stdout) in that case
+# the behavior will remain as with the setting FAIL_ON_WARNINGS.
+# Possible values are: NO, YES, FAIL_ON_WARNINGS and FAIL_ON_WARNINGS_PRINT.
 # The default value is: NO.
 
 WARN_AS_ERROR          = NO
@@ -815,13 +900,27 @@ WARN_AS_ERROR          = NO
 # and the warning text. Optionally the format may contain $version, which will
 # be replaced by the version of the file (if it could be obtained via
 # FILE_VERSION_FILTER)
+# See also: WARN_LINE_FORMAT
 # The default value is: $file:$line: $text.
 
 WARN_FORMAT            = "$file:$line: $text"
 
+# In the $text part of the WARN_FORMAT command it is possible that a reference
+# to a more specific place is given. To make it easier to jump to this place
+# (outside of doxygen) the user can define a custom "cut" / "paste" string.
+# Example:
+# WARN_LINE_FORMAT = "'vi $file +$line'"
+# See also: WARN_FORMAT
+# The default value is: at line $line of file $file.
+
+WARN_LINE_FORMAT       = "at line $line of file $file"
+
 # The WARN_LOGFILE tag can be used to specify a file to which warning and error
 # messages should be written. If left blank the output is written to standard
-# error (stderr).
+# error (stderr). In case the file specified cannot be opened for writing the
+# warning and error messages are written to standard error. When as file - is
+# specified the warning and error messages are written to standard output
+# (stdout).
 
 WARN_LOGFILE           =
 
@@ -842,12 +941,23 @@ INPUT                  = main_page.md \
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
 # libiconv (or the iconv built into libc) for the transcoding. See the libiconv
-# documentation (see: https://www.gnu.org/software/libiconv/) for the list of
-# possible encodings.
+# documentation (see:
+# https://www.gnu.org/software/libiconv/) for the list of possible encodings.
+# See also: INPUT_FILE_ENCODING
 # The default value is: UTF-8.
 
 INPUT_ENCODING         = UTF-8
 
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses The INPUT_FILE_ENCODING tag can be used to specify
+# character encoding on a per file pattern basis. Doxygen will compare the file
+# name with each pattern and apply the encoding instead of the default
+# INPUT_ENCODING) if there is a match. The character encodings are a list of the
+# form: pattern=encoding (like *.php=ISO-8859-1). See cfg_input_encoding
+# "INPUT_ENCODING" for further information on supported encodings.
+
+INPUT_FILE_ENCODING    =
+
 # If the value of the INPUT tag contains directories, you can use the
 # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
 # *.h) to filter out the source-files in the directories.
@@ -856,13 +966,15 @@ INPUT_ENCODING         = UTF-8
 # need to set EXTENSION_MAPPING for the extension otherwise the files are not
 # read by doxygen.
 #
-# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
-# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
-# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
-# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),
-# *.doc (to be provided as doxygen C comment), *.txt (to be provided as doxygen
-# C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd,
-# *.vhdl, *.ucf, *.qsf and *.ice.
+# Note the list of default checked file patterns might differ from the list of
+# default file extension mappings.
+#
+# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cxxm,
+# *.cpp, *.cppm, *.c++, *.c++m, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl,
+# *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp, *.h++, *.ixx, *.l, *.cs, *.d, *.php,
+# *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown, *.md, *.mm, *.dox (to be
+# provided as doxygen C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08,
+# *.f18, *.f, *.for, *.vhd, *.vhdl, *.ucf, *.qsf and *.ice.
 
 FILE_PATTERNS          = *.cpp \
                          *.hpp \
@@ -907,10 +1019,7 @@ EXCLUDE_PATTERNS       = */nvtx/* \
 # (namespaces, classes, functions, etc.) that should be excluded from the
 # output. The symbol name can be a fully qualified name, a word, or if the
 # wildcard * is used, a substring. Examples: ANamespace, AClass,
-# AClass::ANamespace, ANamespace::*Test
-#
-# Note that the wildcards are matched against the file with absolute path, so to
-# exclude all test directories use the pattern */test/*
+# ANamespace::AClass, ANamespace::*Test
 
 EXCLUDE_SYMBOLS        = org::apache
 
@@ -955,6 +1064,11 @@ IMAGE_PATH             =
 # code is scanned, but not when the output code is generated. If lines are added
 # or removed, the anchors will not be placed correctly.
 #
+# Note that doxygen will use the data processed and written to standard output
+# for further processing, therefore nothing else, like debug statements or used
+# commands (so in case of a Windows batch file always use @echo OFF), should be
+# written to standard output.
+#
 # Note that for custom extensions or not directly supported extensions you also
 # need to set EXTENSION_MAPPING for the extension otherwise the files are not
 # properly processed by doxygen.
@@ -996,6 +1110,15 @@ FILTER_SOURCE_PATTERNS =
 
 USE_MDFILE_AS_MAINPAGE = main_page.md
 
+# The Fortran standard specifies that for fixed formatted Fortran code all
+# characters from position 72 are to be considered as comment. A common
+# extension is to allow longer lines before the automatic comment starts. The
+# setting FORTRAN_COMMENT_AFTER will also make it possible that longer lines can
+# be processed before the automatic comment starts.
+# Minimum value: 7, maximum value: 10000, default value: 72.
+
+FORTRAN_COMMENT_AFTER  = 72
+
 #---------------------------------------------------------------------------
 # Configuration options related to source browsing
 #---------------------------------------------------------------------------
@@ -1093,17 +1216,11 @@ VERBATIM_HEADERS       = YES
 
 ALPHABETICAL_INDEX     = YES
 
-# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
-# which the alphabetical index list will be split.
-# Minimum value: 1, maximum value: 20, default value: 5.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-COLS_IN_ALPHA_INDEX    = 5
-
-# In case all classes in a project start with a common prefix, all classes will
-# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
-# can be used to specify a prefix (or a list of prefixes) that should be ignored
-# while generating the index headers.
+# The IGNORE_PREFIX tag can be used to specify a prefix (or a list of prefixes)
+# that should be ignored while generating the index headers. The IGNORE_PREFIX
+# tag works for classes, function and member names. The entity will be placed in
+# the alphabetical list under the first letter of the entity name that remains
+# after removing the prefix.
 # This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
 
 IGNORE_PREFIX          =
@@ -1115,7 +1232,7 @@ IGNORE_PREFIX          =
 # If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output
 # The default value is: YES.
 
-GENERATE_HTML          = YES
+GENERATE_HTML          = NO
 
 # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
 # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
@@ -1182,7 +1299,12 @@ HTML_STYLESHEET        =
 # Doxygen will copy the style sheet files to the output directory.
 # Note: The order of the extra style sheet files is of importance (e.g. the last
 # style sheet in the list overrules the setting of the previous ones in the
-# list). For an example see the documentation.
+# list).
+# Note: Since the styling of scrollbars can currently not be overruled in
+# Webkit/Chromium, the styling will be left out of the default doxygen.css if
+# one or more extra stylesheets have been specified. So if scrollbar
+# customization is desired it has to be added explicitly. For an example see the
+# documentation.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
 HTML_EXTRA_STYLESHEET  =
@@ -1197,9 +1319,22 @@ HTML_EXTRA_STYLESHEET  =
 
 HTML_EXTRA_FILES       =
 
+# The HTML_COLORSTYLE tag can be used to specify if the generated HTML output
+# should be rendered with a dark or light theme.
+# Possible values are: LIGHT always generate light mode output, DARK always
+# generate dark mode output, AUTO_LIGHT automatically set the mode according to
+# the user preference, use light mode if no preference is set (the default),
+# AUTO_DARK automatically set the mode according to the user preference, use
+# dark mode if no preference is set and TOGGLE allow to user to switch between
+# light and dark mode via a button.
+# The default value is: AUTO_LIGHT.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE        = AUTO_LIGHT
+
 # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
 # will adjust the colors in the style sheet and background images according to
-# this color. Hue is specified as an angle on a colorwheel, see
+# this color. Hue is specified as an angle on a color-wheel, see
 # https://en.wikipedia.org/wiki/Hue for more information. For instance the value
 # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
 # purple, and 360 is red again.
@@ -1209,7 +1344,7 @@ HTML_EXTRA_FILES       =
 HTML_COLORSTYLE_HUE    = 270
 
 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
-# in the HTML output. For a value of 0 the output will use grayscales only. A
+# in the HTML output. For a value of 0 the output will use gray-scales only. A
 # value of 255 will produce the most vivid colors.
 # Minimum value: 0, maximum value: 255, default value: 100.
 # This tag requires that the tag GENERATE_HTML is set to YES.
@@ -1227,15 +1362,6 @@ HTML_COLORSTYLE_SAT    = 255
 
 HTML_COLORSTYLE_GAMMA  = 80
 
-# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
-# page will contain the date and time when the page was generated. Setting this
-# to YES can help to show when doxygen was last run and thus if the
-# documentation is up to date.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_TIMESTAMP         = NO
-
 # If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML
 # documentation will contain a main index with vertical navigation menus that
 # are dynamically created via JavaScript. If disabled, the navigation index will
@@ -1255,6 +1381,13 @@ HTML_DYNAMIC_MENUS     = YES
 
 HTML_DYNAMIC_SECTIONS  = NO
 
+# If the HTML_CODE_FOLDING tag is set to YES then classes and functions can be
+# dynamically folded and expanded in the generated HTML source code.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_CODE_FOLDING      = YES
+
 # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
 # shown in the various tree structured indices initially; the user can expand
 # and collapse entries dynamically later on. Doxygen will expand the tree to
@@ -1270,10 +1403,11 @@ HTML_INDEX_NUM_ENTRIES = 100
 
 # If the GENERATE_DOCSET tag is set to YES, additional index files will be
 # generated that can be used as input for Apple's Xcode 3 integrated development
-# environment (see: https://developer.apple.com/xcode/), introduced with OSX
-# 10.5 (Leopard). To create a documentation set, doxygen will generate a
-# Makefile in the HTML output directory. Running make will produce the docset in
-# that directory and running make install will install the docset in
+# environment (see:
+# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To
+# create a documentation set, doxygen will generate a Makefile in the HTML
+# output directory. Running make will produce the docset in that directory and
+# running make install will install the docset in
 # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
 # startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
 # genXcode/_index.html for more information.
@@ -1290,6 +1424,13 @@ GENERATE_DOCSET        = NO
 
 DOCSET_FEEDNAME        = "Doxygen generated docs"
 
+# This tag determines the URL of the docset feed. A documentation feed provides
+# an umbrella under which multiple documentation sets from a single provider
+# (such as a company or product suite) can be grouped.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_FEEDURL         =
+
 # This tag specifies a string that should uniquely identify the documentation
 # set bundle. This should be a reverse domain-name style string, e.g.
 # com.mycompany.MyDocSet. Doxygen will append .docset to the name.
@@ -1315,8 +1456,12 @@ DOCSET_PUBLISHER_NAME  = Publisher
 # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
 # additional HTML index files: index.hhp, index.hhc, and index.hhk. The
 # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
-# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on
-# Windows.
+# on Windows. In the beginning of 2021 Microsoft took the original page, with
+# a.o. the download links, offline the HTML help workshop was already many years
+# in maintenance mode). You can download the HTML help workshop from the web
+# archives at Installation executable (see:
+# http://web.archive.org/web/20160201063255/http://download.microsoft.com/downlo
+# ad/0/A/9/0A939EF6-E31C-430F-A3DF-DFAE7960D564/htmlhelp.exe).
 #
 # The HTML Help Workshop contains a compiler that can convert all HTML output
 # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
@@ -1373,6 +1518,16 @@ BINARY_TOC             = NO
 
 TOC_EXPAND             = NO
 
+# The SITEMAP_URL tag is used to specify the full URL of the place where the
+# generated documentation will be placed on the server by the user during the
+# deployment of the documentation. The generated sitemap is called sitemap.xml
+# and placed on the directory specified by HTML_OUTPUT. In case no SITEMAP_URL
+# is specified no sitemap is generated. For information about the sitemap
+# protocol see https://www.sitemaps.org
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+SITEMAP_URL            =
+
 # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
 # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
 # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
@@ -1391,7 +1546,8 @@ QCH_FILE               =
 
 # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
 # Project output. For more information please see Qt Help Project / Namespace
-# (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
+# (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
 # The default value is: org.doxygen.Project.
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
@@ -1399,8 +1555,8 @@ QHP_NAMESPACE          = org.doxygen.Project
 
 # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
 # Help Project output. For more information please see Qt Help Project / Virtual
-# Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-
-# folders).
+# Folders (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders).
 # The default value is: doc.
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
@@ -1408,16 +1564,16 @@ QHP_VIRTUAL_FOLDER     = doc
 
 # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
 # filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
-# filters).
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
 QHP_CUST_FILTER_NAME   =
 
 # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
 # custom filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
-# filters).
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
 QHP_CUST_FILTER_ATTRS  =
@@ -1429,9 +1585,9 @@ QHP_CUST_FILTER_ATTRS  =
 
 QHP_SECT_FILTER_ATTRS  =
 
-# The QHG_LOCATION tag can be used to specify the location of Qt's
-# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
-# generated .qhp file.
+# The QHG_LOCATION tag can be used to specify the location (absolute path
+# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to
+# run qhelpgenerator on the generated .qhp file.
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
 QHG_LOCATION           =
@@ -1474,16 +1630,28 @@ DISABLE_INDEX          = NO
 # to work a browser that supports JavaScript, DHTML, CSS and frames is required
 # (i.e. any modern browser). Windows users are probably better off using the
 # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can
-# further fine-tune the look of the index. As an example, the default style
-# sheet generated by doxygen has an example that shows how to put an image at
-# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
-# the same information as the tab index, you could consider setting
-# DISABLE_INDEX to YES when enabling this option.
+# further fine tune the look of the index (see "Fine-tuning the output"). As an
+# example, the default style sheet generated by doxygen has an example that
+# shows how to put an image at the root of the tree instead of the PROJECT_NAME.
+# Since the tree basically has the same information as the tab index, you could
+# consider setting DISABLE_INDEX to YES when enabling this option.
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
 GENERATE_TREEVIEW      = NO
 
+# When both GENERATE_TREEVIEW and DISABLE_INDEX are set to YES, then the
+# FULL_SIDEBAR option determines if the side bar is limited to only the treeview
+# area (value NO) or if it should extend to the full height of the window (value
+# YES). Setting this to YES gives a layout similar to
+# https://docs.readthedocs.io with more room for contents, but less room for the
+# project logo, title, and description. If either GENERATE_TREEVIEW or
+# DISABLE_INDEX is set to NO, this option has no effect.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FULL_SIDEBAR           = NO
+
 # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
 # doxygen will group on one line in the generated HTML documentation.
 #
@@ -1508,6 +1676,13 @@ TREEVIEW_WIDTH         = 250
 
 EXT_LINKS_IN_WINDOW    = NO
 
+# If the OBFUSCATE_EMAILS tag is set to YES, doxygen will obfuscate email
+# addresses.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+OBFUSCATE_EMAILS       = YES
+
 # If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg
 # tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see
 # https://inkscape.org) to generate formulas as SVG images instead of PNGs for
@@ -1528,17 +1703,6 @@ HTML_FORMULA_FORMAT    = png
 
 FORMULA_FONTSIZE       = 10
 
-# Use the FORMULA_TRANSPARENT tag to determine whether or not the images
-# generated for formulas are transparent PNGs. Transparent PNGs are not
-# supported properly for IE 6.0, but are supported on all modern browsers.
-#
-# Note that when changing this option you need to delete any form_*.png files in
-# the HTML output directory before the changes have effect.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-FORMULA_TRANSPARENT    = YES
-
 # The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands
 # to create new LaTeX commands to be used in formulas as building blocks. See
 # the section "Including formulas" for details.
@@ -1556,11 +1720,29 @@ FORMULA_MACROFILE      =
 
 USE_MATHJAX            = NO
 
+# With MATHJAX_VERSION it is possible to specify the MathJax version to be used.
+# Note that the different versions of MathJax have different requirements with
+# regards to the different settings, so it is possible that also other MathJax
+# settings have to be changed when switching between the different MathJax
+# versions.
+# Possible values are: MathJax_2 and MathJax_3.
+# The default value is: MathJax_2.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_VERSION        = MathJax_2
+
 # When MathJax is enabled you can set the default output format to be used for
-# the MathJax output. See the MathJax site (see:
-# http://docs.mathjax.org/en/latest/output.html) for more details.
+# the MathJax output. For more details about the output format see MathJax
+# version 2 (see:
+# http://docs.mathjax.org/en/v2.7-latest/output.html) and MathJax version 3
+# (see:
+# http://docs.mathjax.org/en/latest/web/components/output.html).
 # Possible values are: HTML-CSS (which is slower, but has the best
-# compatibility), NativeMML (i.e. MathML) and SVG.
+# compatibility. This is the name for Mathjax version 2, for MathJax version 3
+# this will be translated into chtml), NativeMML (i.e. MathML. Only supported
+# for NathJax 2. For MathJax version 3 chtml will be used instead.), chtml (This
+# is the name for Mathjax version 3, for MathJax version 2 this will be
+# translated into HTML-CSS) and SVG.
 # The default value is: HTML-CSS.
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
@@ -1573,22 +1755,29 @@ MATHJAX_FORMAT         = HTML-CSS
 # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
 # Content Delivery Network so you can quickly see the result without installing
 # MathJax. However, it is strongly recommended to install a local copy of
-# MathJax from https://www.mathjax.org before deployment.
-# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2.
+# MathJax from https://www.mathjax.org before deployment. The default value is:
+# - in case of MathJax version 2: https://cdn.jsdelivr.net/npm/mathjax@2
+# - in case of MathJax version 3: https://cdn.jsdelivr.net/npm/mathjax@3
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
 MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
 
 # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
 # extension names that should be enabled during MathJax rendering. For example
+# for MathJax version 2 (see
+# https://docs.mathjax.org/en/v2.7-latest/tex.html#tex-and-latex-extensions):
 # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
+# For example for MathJax version 3 (see
+# http://docs.mathjax.org/en/latest/input/tex/extensions/index.html):
+# MATHJAX_EXTENSIONS = ams
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
 MATHJAX_EXTENSIONS     =
 
 # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
 # of code that will be used on startup of the MathJax code. See the MathJax site
-# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
+# (see:
+# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an
 # example see the documentation.
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
@@ -1635,7 +1824,8 @@ SERVER_BASED_SEARCH    = NO
 #
 # Doxygen ships with an example indexer (doxyindexer) and search engine
 # (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: https://xapian.org/).
+# Xapian (see:
+# https://xapian.org/).
 #
 # See the section "External Indexing and Searching" for details.
 # The default value is: NO.
@@ -1648,8 +1838,9 @@ EXTERNAL_SEARCH        = NO
 #
 # Doxygen ships with an example indexer (doxyindexer) and search engine
 # (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: https://xapian.org/). See the section "External Indexing and
-# Searching" for details.
+# Xapian (see:
+# https://xapian.org/). See the section "External Indexing and Searching" for
+# details.
 # This tag requires that the tag SEARCHENGINE is set to YES.
 
 SEARCHENGINE_URL       =
@@ -1758,29 +1949,31 @@ PAPER_TYPE             = a4
 
 EXTRA_PACKAGES         =
 
-# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
-# generated LaTeX document. The header should contain everything until the first
-# chapter. If it is left blank doxygen will generate a standard header. See
-# section "Doxygen usage" for information on how to let doxygen write the
-# default header to a separate file.
+# The LATEX_HEADER tag can be used to specify a user-defined LaTeX header for
+# the generated LaTeX document. The header should contain everything until the
+# first chapter. If it is left blank doxygen will generate a standard header. It
+# is highly recommended to start with a default header using
+# doxygen -w latex new_header.tex new_footer.tex new_stylesheet.sty
+# and then modify the file new_header.tex. See also section "Doxygen usage" for
+# information on how to generate the default header that doxygen normally uses.
 #
-# Note: Only use a user-defined header if you know what you are doing! The
-# following commands have a special meaning inside the header: $title,
-# $datetime, $date, $doxygenversion, $projectname, $projectnumber,
-# $projectbrief, $projectlogo. Doxygen will replace $title with the empty
-# string, for the replacement values of the other commands the user is referred
-# to HTML_HEADER.
+# Note: Only use a user-defined header if you know what you are doing!
+# Note: The header is subject to change so you typically have to regenerate the
+# default header when upgrading to a newer version of doxygen. The following
+# commands have a special meaning inside the header (and footer): For a
+# description of the possible markers and block names see the documentation.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
 LATEX_HEADER           =
 
-# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
-# generated LaTeX document. The footer should contain everything after the last
-# chapter. If it is left blank doxygen will generate a standard footer. See
+# The LATEX_FOOTER tag can be used to specify a user-defined LaTeX footer for
+# the generated LaTeX document. The footer should contain everything after the
+# last chapter. If it is left blank doxygen will generate a standard footer. See
 # LATEX_HEADER for more information on how to generate a default footer and what
-# special commands can be used inside the footer.
-#
-# Note: Only use a user-defined footer if you know what you are doing!
+# special commands can be used inside the footer. See also section "Doxygen
+# usage" for information on how to generate the default footer that doxygen
+# normally uses. Note: Only use a user-defined footer if you know what you are
+# doing!
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
 LATEX_FOOTER           =
@@ -1823,10 +2016,16 @@ PDF_HYPERLINKS         = YES
 
 USE_PDFLATEX           = YES
 
-# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
-# command to the generated LaTeX files. This will instruct LaTeX to keep running
-# if errors occur, instead of asking the user for help. This option is also used
-# when generating formulas in HTML.
+# The LATEX_BATCHMODE tag signals the behavior of LaTeX in case of an error.
+# Possible values are: NO same as ERROR_STOP, YES same as BATCH, BATCH In batch
+# mode nothing is printed on the terminal, errors are scrolled as if <return> is
+# hit at every error; missing files that TeX tries to input or request from
+# keyboard input (\read on a not open input stream) cause the job to abort,
+# NON_STOP In nonstop mode the diagnostic message will appear on the terminal,
+# but there is no possibility of user interaction just like in batch mode,
+# SCROLL In scroll mode, TeX will stop only for missing files to input or if
+# keyboard input is necessary and ERROR_STOP In errorstop mode, TeX will stop at
+# each error, asking for user intervention.
 # The default value is: NO.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
@@ -1839,16 +2038,6 @@ LATEX_BATCHMODE        = NO
 
 LATEX_HIDE_INDICES     = NO
 
-# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
-# code with syntax highlighting in the LaTeX output.
-#
-# Note that which sources are shown also depends on other settings such as
-# SOURCE_BROWSER.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_SOURCE_CODE      = NO
-
 # The LATEX_BIB_STYLE tag can be used to specify the style to use for the
 # bibliography, e.g. plainnat, or ieeetr. See
 # https://en.wikipedia.org/wiki/BibTeX and \cite for more info.
@@ -1857,14 +2046,6 @@ LATEX_SOURCE_CODE      = NO
 
 LATEX_BIB_STYLE        = plain
 
-# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated
-# page will contain the date and time when the page was generated. Setting this
-# to NO can help when comparing the output of multiple runs.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_TIMESTAMP        = NO
-
 # The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute)
 # path from which the emoji images will be read. If a relative path is entered,
 # it will be relative to the LATEX_OUTPUT directory. If left blank the
@@ -1929,16 +2110,6 @@ RTF_STYLESHEET_FILE    =
 
 RTF_EXTENSIONS_FILE    =
 
-# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
-# with syntax highlighting in the RTF output.
-#
-# Note that which sources are shown also depends on other settings such as
-# SOURCE_BROWSER.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_SOURCE_CODE        = NO
-
 #---------------------------------------------------------------------------
 # Configuration options related to the man page output
 #---------------------------------------------------------------------------
@@ -1991,7 +2162,7 @@ MAN_LINKS              = NO
 # captures the structure of the code including all documentation.
 # The default value is: NO.
 
-GENERATE_XML           = NO
+GENERATE_XML           = YES
 
 # The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
 # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
@@ -2035,27 +2206,44 @@ GENERATE_DOCBOOK       = NO
 
 DOCBOOK_OUTPUT         = docbook
 
-# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the
-# program listings (including syntax highlighting and cross-referencing
-# information) to the DOCBOOK output. Note that enabling this will significantly
-# increase the size of the DOCBOOK output.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
-
-DOCBOOK_PROGRAMLISTING = NO
-
 #---------------------------------------------------------------------------
 # Configuration options for the AutoGen Definitions output
 #---------------------------------------------------------------------------
 
 # If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
-# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures
+# AutoGen Definitions (see https://autogen.sourceforge.net/) file that captures
 # the structure of the code including all documentation. Note that this feature
 # is still experimental and incomplete at the moment.
 # The default value is: NO.
 
 GENERATE_AUTOGEN_DEF   = NO
 
+#---------------------------------------------------------------------------
+# Configuration options related to Sqlite3 output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_SQLITE3 tag is set to YES doxygen will generate a Sqlite3
+# database with symbols found by doxygen stored in tables.
+# The default value is: NO.
+
+GENERATE_SQLITE3       = NO
+
+# The SQLITE3_OUTPUT tag is used to specify where the Sqlite3 database will be
+# put. If a relative path is entered the value of OUTPUT_DIRECTORY will be put
+# in front of it.
+# The default directory is: sqlite3.
+# This tag requires that the tag GENERATE_SQLITE3 is set to YES.
+
+SQLITE3_OUTPUT         = sqlite3
+
+# The SQLITE3_OVERWRITE_DB tag is set to YES, the existing doxygen_sqlite3.db
+# database file will be recreated with each doxygen run. If set to NO, doxygen
+# will warn if an a database file is already found and not modify it.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_SQLITE3 is set to YES.
+
+SQLITE3_RECREATE_DB    = YES
+
 #---------------------------------------------------------------------------
 # Configuration options related to the Perl module output
 #---------------------------------------------------------------------------
@@ -2130,7 +2318,8 @@ SEARCH_INCLUDES        = YES
 
 # The INCLUDE_PATH tag can be used to specify one or more directories that
 # contain include files that are not input files but should be processed by the
-# preprocessor.
+# preprocessor. Note that the INCLUDE_PATH is not recursive, so the setting of
+# RECURSIVE has no effect here.
 # This tag requires that the tag SEARCH_INCLUDES is set to YES.
 
 INCLUDE_PATH           =
@@ -2197,15 +2386,15 @@ TAGFILES               = rmm.tag=https://docs.rapids.ai/api/librmm/22.08
 
 GENERATE_TAGFILE       =
 
-# If the ALLEXTERNALS tag is set to YES, all external class will be listed in
-# the class index. If set to NO, only the inherited external classes will be
-# listed.
+# If the ALLEXTERNALS tag is set to YES, all external classes and namespaces
+# will be listed in the class and namespace index. If set to NO, only the
+# inherited external classes will be listed.
 # The default value is: NO.
 
 ALLEXTERNALS           = NO
 
 # If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed
-# in the modules index. If set to NO, only the current project's groups will be
+# in the topic index. If set to NO, only the current project's groups will be
 # listed.
 # The default value is: YES.
 
@@ -2219,25 +2408,9 @@ EXTERNAL_GROUPS        = YES
 EXTERNAL_PAGES         = YES
 
 #---------------------------------------------------------------------------
-# Configuration options related to the dot tool
+# Configuration options related to diagram generator tools
 #---------------------------------------------------------------------------
 
-# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram
-# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
-# NO turns the diagrams off. Note that this option also works with HAVE_DOT
-# disabled, but it is recommended to install and use dot, since it yields more
-# powerful graphs.
-# The default value is: YES.
-
-CLASS_DIAGRAMS         = YES
-
-# You can include diagrams made with dia in doxygen documentation. Doxygen will
-# then run dia to produce the diagram and insert it in the documentation. The
-# DIA_PATH tag allows you to specify the directory where the dia binary resides.
-# If left empty dia is assumed to be found in the default search path.
-
-DIA_PATH               =
-
 # If set to YES the inheritance and collaboration graphs will hide inheritance
 # and usage relations if the target is undocumented or is not a class.
 # The default value is: YES.
@@ -2246,7 +2419,7 @@ HIDE_UNDOC_RELATIONS   = YES
 
 # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
 # available from the path. This tool is part of Graphviz (see:
-# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
+# https://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
 # Bell Labs. The other options in this section have no effect if this option is
 # set to NO
 # The default value is: NO.
@@ -2263,49 +2436,73 @@ HAVE_DOT               = YES
 
 DOT_NUM_THREADS        = 0
 
-# When you want a differently looking font in the dot files that doxygen
-# generates you can specify the font name using DOT_FONTNAME. You need to make
-# sure dot is able to find the font, which can be done by putting it in a
-# standard location or by setting the DOTFONTPATH environment variable or by
-# setting DOT_FONTPATH to the directory containing the font.
-# The default value is: Helvetica.
+# DOT_COMMON_ATTR is common attributes for nodes, edges and labels of
+# subgraphs. When you want a differently looking font in the dot files that
+# doxygen generates you can specify fontname, fontcolor and fontsize attributes.
+# For details please see <a href=https://graphviz.org/doc/info/attrs.html>Node,
+# Edge and Graph Attributes specification</a> You need to make sure dot is able
+# to find the font, which can be done by putting it in a standard location or by
+# setting the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
+# directory containing the font. Default graphviz fontsize is 14.
+# The default value is: fontname=Helvetica,fontsize=10.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-DOT_FONTNAME           = Helvetica
+DOT_COMMON_ATTR        = "fontname=Helvetica,fontsize=10"
 
-# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
-# dot graphs.
-# Minimum value: 4, maximum value: 24, default value: 10.
+# DOT_EDGE_ATTR is concatenated with DOT_COMMON_ATTR. For elegant style you can
+# add 'arrowhead=open, arrowtail=open, arrowsize=0.5'. <a
+# href=https://graphviz.org/doc/info/arrows.html>Complete documentation about
+# arrows shapes.</a>
+# The default value is: labelfontname=Helvetica,labelfontsize=10.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-DOT_FONTSIZE           = 10
+DOT_EDGE_ATTR          = "labelfontname=Helvetica,labelfontsize=10"
 
-# By default doxygen will tell dot to use the default font as specified with
-# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
-# the path where dot can find it using this tag.
+# DOT_NODE_ATTR is concatenated with DOT_COMMON_ATTR. For view without boxes
+# around nodes set 'shape=plain' or 'shape=plaintext' <a
+# href=https://www.graphviz.org/doc/info/shapes.html>Shapes specification</a>
+# The default value is: shape=box,height=0.2,width=0.4.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_NODE_ATTR          = "shape=box,height=0.2,width=0.4"
+
+# You can set the path where dot can find font specified with fontname in
+# DOT_COMMON_ATTR and others dot attributes.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
 DOT_FONTPATH           =
 
-# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
-# each documented class showing the direct and indirect inheritance relations.
-# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
+# If the CLASS_GRAPH tag is set to YES or GRAPH or BUILTIN then doxygen will
+# generate a graph for each documented class showing the direct and indirect
+# inheritance relations. In case the CLASS_GRAPH tag is set to YES or GRAPH and
+# HAVE_DOT is enabled as well, then dot will be used to draw the graph. In case
+# the CLASS_GRAPH tag is set to YES and HAVE_DOT is disabled or if the
+# CLASS_GRAPH tag is set to BUILTIN, then the built-in generator will be used.
+# If the CLASS_GRAPH tag is set to TEXT the direct and indirect inheritance
+# relations will be shown as texts / links.
+# Possible values are: NO, YES, TEXT, GRAPH and BUILTIN.
 # The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
 
 CLASS_GRAPH            = YES
 
 # If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
 # graph for each documented class showing the direct and indirect implementation
 # dependencies (inheritance, containment, and class references variables) of the
-# class with other documented classes.
+# class with other documented classes. Explicit enabling a collaboration graph,
+# when COLLABORATION_GRAPH is set to NO, can be accomplished by means of the
+# command \collaborationgraph. Disabling a collaboration graph can be
+# accomplished by means of the command \hidecollaborationgraph.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
 COLLABORATION_GRAPH    = YES
 
 # If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
-# groups, showing the direct groups dependencies.
+# groups, showing the direct groups dependencies. Explicit enabling a group
+# dependency graph, when GROUP_GRAPHS is set to NO, can be accomplished by means
+# of the command \groupgraph. Disabling a directory graph can be accomplished by
+# means of the command \hidegroupgraph. See also the chapter Grouping in the
+# manual.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
@@ -2328,10 +2525,32 @@ UML_LOOK               = NO
 # but if the number exceeds 15, the total amount of fields shown is limited to
 # 10.
 # Minimum value: 0, maximum value: 100, default value: 10.
-# This tag requires that the tag HAVE_DOT is set to YES.
+# This tag requires that the tag UML_LOOK is set to YES.
 
 UML_LIMIT_NUM_FIELDS   = 10
 
+# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and
+# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS
+# tag is set to YES, doxygen will add type and arguments for attributes and
+# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen
+# will not generate fields with class member information in the UML graphs. The
+# class diagrams will look similar to the default class diagrams but using UML
+# notation for the relationships.
+# Possible values are: NO, YES and NONE.
+# The default value is: NO.
+# This tag requires that the tag UML_LOOK is set to YES.
+
+DOT_UML_DETAILS        = NO
+
+# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters
+# to display on a single line. If the actual line length exceeds this threshold
+# significantly it will wrapped across multiple lines. Some heuristics are apply
+# to avoid ugly line breaks.
+# Minimum value: 0, maximum value: 1000, default value: 17.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_WRAP_THRESHOLD     = 17
+
 # If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
 # collaboration graphs will show the relations between templates and their
 # instances.
@@ -2343,7 +2562,9 @@ TEMPLATE_RELATIONS     = NO
 # If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
 # YES then doxygen will generate a graph for each documented file showing the
 # direct and indirect include dependencies of the file with other documented
-# files.
+# files. Explicit enabling an include graph, when INCLUDE_GRAPH is is set to NO,
+# can be accomplished by means of the command \includegraph. Disabling an
+# include graph can be accomplished by means of the command \hideincludegraph.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
@@ -2352,7 +2573,10 @@ INCLUDE_GRAPH          = YES
 # If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
 # set to YES then doxygen will generate a graph for each documented file showing
 # the direct and indirect include dependencies of the file with other documented
-# files.
+# files. Explicit enabling an included by graph, when INCLUDED_BY_GRAPH is set
+# to NO, can be accomplished by means of the command \includedbygraph. Disabling
+# an included by graph can be accomplished by means of the command
+# \hideincludedbygraph.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
@@ -2392,16 +2616,26 @@ GRAPHICAL_HIERARCHY    = YES
 # If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
 # dependencies a directory has on other directories in a graphical way. The
 # dependency relations are determined by the #include relations between the
-# files in the directories.
+# files in the directories. Explicit enabling a directory graph, when
+# DIRECTORY_GRAPH is set to NO, can be accomplished by means of the command
+# \directorygraph. Disabling a directory graph can be accomplished by means of
+# the command \hidedirectorygraph.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
 DIRECTORY_GRAPH        = YES
 
+# The DIR_GRAPH_MAX_DEPTH tag can be used to limit the maximum number of levels
+# of child directories generated in directory dependency graphs by dot.
+# Minimum value: 1, maximum value: 25, default value: 1.
+# This tag requires that the tag DIRECTORY_GRAPH is set to YES.
+
+DIR_GRAPH_MAX_DEPTH    = 1
+
 # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
 # generated by dot. For an explanation of the image formats see the section
 # output formats in the documentation of the dot tool (Graphviz (see:
-# http://www.graphviz.org/)).
+# https://www.graphviz.org/)).
 # Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
 # to make the SVG files visible in IE 9+ (other browsers do not have this
 # requirement).
@@ -2438,11 +2672,12 @@ DOT_PATH               =
 
 DOTFILE_DIRS           =
 
-# The MSCFILE_DIRS tag can be used to specify one or more directories that
-# contain msc files that are included in the documentation (see the \mscfile
-# command).
+# You can include diagrams made with dia in doxygen documentation. Doxygen will
+# then run dia to produce the diagram and insert it in the documentation. The
+# DIA_PATH tag allows you to specify the directory where the dia binary resides.
+# If left empty dia is assumed to be found in the default search path.
 
-MSCFILE_DIRS           =
+DIA_PATH               =
 
 # The DIAFILE_DIRS tag can be used to specify one or more directories that
 # contain dia files that are included in the documentation (see the \diafile
@@ -2451,10 +2686,10 @@ MSCFILE_DIRS           =
 DIAFILE_DIRS           =
 
 # When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the
-# path where java can find the plantuml.jar file. If left blank, it is assumed
-# PlantUML is not used or called during a preprocessing step. Doxygen will
-# generate a warning when it encounters a \startuml command in this case and
-# will not generate output for the diagram.
+# path where java can find the plantuml.jar file or to the filename of jar file
+# to be used. If left blank, it is assumed PlantUML is not used or called during
+# a preprocessing step. Doxygen will generate a warning when it encounters a
+# \startuml command in this case and will not generate output for the diagram.
 
 PLANTUML_JAR_PATH      =
 
@@ -2492,18 +2727,6 @@ DOT_GRAPH_MAX_NODES    = 50
 
 MAX_DOT_GRAPH_DEPTH    = 0
 
-# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
-# background. This is disabled by default, because dot on Windows does not seem
-# to support this out of the box.
-#
-# Warning: Depending on the platform used, enabling this option may lead to
-# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
-# read).
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_TRANSPARENT        = NO
-
 # Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output
 # files in one run (i.e. multiple -o and -T options on the command line). This
 # makes dot run faster, but since only newer versions of dot (>1.8.10) support
@@ -2516,14 +2739,34 @@ DOT_MULTI_TARGETS      = NO
 # If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
 # explaining the meaning of the various boxes and arrows in the dot generated
 # graphs.
+# Note: This tag requires that UML_LOOK isn't set, i.e. the doxygen internal
+# graphical representation for inheritance and collaboration diagrams is used.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
 GENERATE_LEGEND        = YES
 
-# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
+# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate
 # files that are used to generate the various graphs.
+#
+# Note: This setting is not only used for dot files but also for msc temporary
+# files.
 # The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
 
 DOT_CLEANUP            = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. If the MSCGEN_TOOL tag is left empty (the default), then doxygen will
+# use a built-in version of mscgen tool to produce the charts. Alternatively,
+# the MSCGEN_TOOL tag can also specify the name an external tool. For instance,
+# specifying prog as the value, doxygen will call the tool as prog -T
+# <outfile_format> -o <outputfile> <inputfile>. The external tool should support
+# output file formats "png", "eps", "svg", and "ismap".
+
+MSCGEN_TOOL            =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the \mscfile
+# command).
+
+MSCFILE_DIRS           =
diff --git a/cpp/include/cugraph/detail/collect_comm_wrapper.hpp b/cpp/include/cugraph/detail/collect_comm_wrapper.hpp
new file mode 100644
index 00000000000..b791c593f41
--- /dev/null
+++ b/cpp/include/cugraph/detail/collect_comm_wrapper.hpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <raft/core/handle.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <optional>
+
+namespace cugraph {
+namespace detail {
+
+/**
+ * @brief Gather the span of data from all ranks and broadcast the combined data to all ranks.
+ *
+ * @param[in] handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator,
+ * and handles to various CUDA libraries) to run graph algorithms.
+ * @param[in] comm Raft comms that manages underlying NCCL comms handles across the ranks.
+ * @param[in] d_input The span of data to perform the 'allgatherv'.
+ *
+ * @return A vector containing the combined data of all ranks.
+ */
+template <typename T>
+rmm::device_uvector<T> device_allgatherv(raft::handle_t const& handle,
+                                         raft::comms::comms_t const& comm,
+                                         raft::device_span<T const> d_input);
+
+}  // namespace detail
+}  // namespace cugraph
diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp
index 5c1e9d5311f..6a75a420bf8 100644
--- a/cpp/include/cugraph/graph_functions.hpp
+++ b/cpp/include/cugraph/graph_functions.hpp
@@ -973,4 +973,71 @@ renumber_sampled_edgelist(
     label_offsets,
   bool do_expensive_check = false);
 
+/**
+ * @brief Remove self loops from an edge list
+ *
+ * @tparam vertex_t    Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t      Type of edge identifiers. Needs to be an integral type.
+ * @tparam weight_t    Type of edge weight. Currently float and double are supported.
+ * @tparam edge_type_t Type of edge type. Needs to be an integral type.
+ *
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param edgelist_srcs  List of source vertex ids
+ * @param edgelist_dsts  List of destination vertex ids
+ * @param edgelist_weights  Optional list of edge weights
+ * @param edgelist_edge_ids  Optional list of edge ids
+ * @param edgelist_edge_types  Optional list of edge types
+ * @return Tuple of vectors storing edge sources, destinations, optional weights,
+ *    optional edge ids, optional edge types.
+ */
+template <typename vertex_t, typename edge_t, typename weight_t, typename edge_type_t>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           std::optional<rmm::device_uvector<weight_t>>,
+           std::optional<rmm::device_uvector<edge_t>>,
+           std::optional<rmm::device_uvector<edge_type_t>>>
+remove_self_loops(raft::handle_t const& handle,
+                  rmm::device_uvector<vertex_t>&& edgelist_srcs,
+                  rmm::device_uvector<vertex_t>&& edgelist_dsts,
+                  std::optional<rmm::device_uvector<weight_t>>&& edgelist_weights,
+                  std::optional<rmm::device_uvector<edge_t>>&& edgelist_edge_ids,
+                  std::optional<rmm::device_uvector<edge_type_t>>&& edgelist_edge_types);
+
+/**
+ * @brief Remove all but one edge when a multi-edge exists.  Note that this function does not use
+ *    stable methods.  When a multi-edge exists, one of the edges will remain, there is no
+ *    guarantee on which one will remain.
+ *
+ * In an MG context it is assumed that edges have been shuffled to the proper GPU,
+ * in which case any multi-edges will be on the same GPU.
+ *
+ * @tparam vertex_t    Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t      Type of edge identifiers. Needs to be an integral type.
+ * @tparam weight_t    Type of edge weight. Currently float and double are supported.
+ * @tparam edge_type_t Type of edge type. Needs to be an integral type.
+ *
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param edgelist_srcs  List of source vertex ids
+ * @param edgelist_dsts  List of destination vertex ids
+ * @param edgelist_weights  Optional list of edge weights
+ * @param edgelist_edge_ids  Optional list of edge ids
+ * @param edgelist_edge_types  Optional list of edge types
+ * @return Tuple of vectors storing edge sources, destinations, optional weights,
+ *    optional edge ids, optional edge types.
+ */
+template <typename vertex_t, typename edge_t, typename weight_t, typename edge_type_t>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           std::optional<rmm::device_uvector<weight_t>>,
+           std::optional<rmm::device_uvector<edge_t>>,
+           std::optional<rmm::device_uvector<edge_type_t>>>
+remove_multi_edges(raft::handle_t const& handle,
+                   rmm::device_uvector<vertex_t>&& edgelist_srcs,
+                   rmm::device_uvector<vertex_t>&& edgelist_dsts,
+                   std::optional<rmm::device_uvector<weight_t>>&& edgelist_weights,
+                   std::optional<rmm::device_uvector<edge_t>>&& edgelist_edge_ids,
+                   std::optional<rmm::device_uvector<edge_type_t>>&& edgelist_edge_types);
+
 }  // namespace cugraph
diff --git a/cpp/include/cugraph_c/centrality_algorithms.h b/cpp/include/cugraph_c/centrality_algorithms.h
index 0ac0e58540f..fb5d4b63b9c 100644
--- a/cpp/include/cugraph_c/centrality_algorithms.h
+++ b/cpp/include/cugraph_c/centrality_algorithms.h
@@ -23,8 +23,6 @@
 #include <cugraph_c/resource_handle.h>
 
 /** @defgroup centrality Centrality algorithms
- *  @ingroup c_api
- *  @{
  */
 
 #ifdef __cplusplus
@@ -39,7 +37,8 @@ typedef struct {
 } cugraph_centrality_result_t;
 
 /**
- * @brief     Get the vertex ids from the centrality result
+ * @ingroup centrality
+ * @brief   Get the vertex ids from the centrality result
  *
  * @param [in]   result   The result from a centrality algorithm
  * @return type erased array of vertex ids
@@ -48,7 +47,8 @@ cugraph_type_erased_device_array_view_t* cugraph_centrality_result_get_vertices(
   cugraph_centrality_result_t* result);
 
 /**
- * @brief     Get the centrality values from a centrality algorithm result
+ * @ingroup centrality
+ * @brief   Get the centrality values from a centrality algorithm result
  *
  * @param [in]   result   The result from a centrality algorithm
  * @return type erased array view of centrality values
@@ -57,6 +57,7 @@ cugraph_type_erased_device_array_view_t* cugraph_centrality_result_get_values(
   cugraph_centrality_result_t* result);
 
 /**
+ * @ingroup centrality
  * @brief     Get the number of iterations executed from the algorithm metadata
  *
  * @param [in]   result   The result from a centrality algorithm
@@ -65,6 +66,7 @@ cugraph_type_erased_device_array_view_t* cugraph_centrality_result_get_values(
 size_t cugraph_centrality_result_get_num_iterations(cugraph_centrality_result_t* result);
 
 /**
+ * @ingroup centrality
  * @brief     Returns true if the centrality algorithm converged
  *
  * @param [in]   result   The result from a centrality algorithm
@@ -73,6 +75,7 @@ size_t cugraph_centrality_result_get_num_iterations(cugraph_centrality_result_t*
 bool_t cugraph_centrality_result_converged(cugraph_centrality_result_t* result);
 
 /**
+ * @ingroup centrality
  * @brief     Free centrality result
  *
  * @param [in]   result   The result from a centrality algorithm
@@ -409,6 +412,7 @@ typedef struct {
 } cugraph_edge_centrality_result_t;
 
 /**
+ * @ingroup centrality
  * @brief     Get the src vertex ids from an edge centrality result
  *
  * @param [in]   result   The result from an edge centrality algorithm
@@ -418,6 +422,7 @@ cugraph_type_erased_device_array_view_t* cugraph_edge_centrality_result_get_src_
   cugraph_edge_centrality_result_t* result);
 
 /**
+ * @ingroup centrality
  * @brief     Get the dst vertex ids from an edge centrality result
  *
  * @param [in]   result   The result from an edge centrality algorithm
@@ -427,6 +432,7 @@ cugraph_type_erased_device_array_view_t* cugraph_edge_centrality_result_get_dst_
   cugraph_edge_centrality_result_t* result);
 
 /**
+ * @ingroup centrality
  * @brief     Get the edge ids from an edge centrality result
  *
  * @param [in]   result   The result from an edge centrality algorithm
@@ -436,6 +442,7 @@ cugraph_type_erased_device_array_view_t* cugraph_edge_centrality_result_get_edge
   cugraph_edge_centrality_result_t* result);
 
 /**
+ * @ingroup centrality
  * @brief     Get the centrality values from an edge centrality algorithm result
  *
  * @param [in]   result   The result from an edge centrality algorithm
@@ -445,6 +452,7 @@ cugraph_type_erased_device_array_view_t* cugraph_edge_centrality_result_get_valu
   cugraph_edge_centrality_result_t* result);
 
 /**
+ * @ingroup centrality
  * @brief     Free centrality result
  *
  * @param [in]   result   The result from a centrality algorithm
@@ -491,6 +499,7 @@ typedef struct {
 } cugraph_hits_result_t;
 
 /**
+ * @ingroup centrality
  * @brief     Get the vertex ids from the hits result
  *
  * @param [in]   result   The result from hits
@@ -500,6 +509,7 @@ cugraph_type_erased_device_array_view_t* cugraph_hits_result_get_vertices(
   cugraph_hits_result_t* result);
 
 /**
+ * @ingroup centrality
  * @brief     Get the hubs values from the hits result
  *
  * @param [in]   result   The result from hits
@@ -509,6 +519,7 @@ cugraph_type_erased_device_array_view_t* cugraph_hits_result_get_hubs(
   cugraph_hits_result_t* result);
 
 /**
+ * @ingroup centrality
  * @brief     Get the authorities values from the hits result
  *
  * @param [in]   result   The result from hits
@@ -518,6 +529,7 @@ cugraph_type_erased_device_array_view_t* cugraph_hits_result_get_authorities(
   cugraph_hits_result_t* result);
 
 /**
+ * @ingroup centrality
  * @brief   Get the score differences between the last two iterations
  *
  * @param [in]   result   The result from hits
@@ -526,6 +538,7 @@ cugraph_type_erased_device_array_view_t* cugraph_hits_result_get_authorities(
 double cugraph_hits_result_get_hub_score_differences(cugraph_hits_result_t* result);
 
 /**
+ * @ingroup centrality
  * @brief   Get the actual number of iterations
  *
  * @param [in]   result   The result from hits
@@ -534,6 +547,7 @@ double cugraph_hits_result_get_hub_score_differences(cugraph_hits_result_t* resu
 size_t cugraph_hits_result_get_number_of_iterations(cugraph_hits_result_t* result);
 
 /**
+ * @ingroup centrality
  * @brief     Free hits result
  *
  * @param [in]   result   The result from hits
@@ -585,7 +599,3 @@ cugraph_error_code_t cugraph_hits(
 #ifdef __cplusplus
 }
 #endif
-
-/**
- *  @}
- */
diff --git a/cpp/include/cugraph_c/community_algorithms.h b/cpp/include/cugraph_c/community_algorithms.h
index 8f1015f8632..feab15c7eeb 100644
--- a/cpp/include/cugraph_c/community_algorithms.h
+++ b/cpp/include/cugraph_c/community_algorithms.h
@@ -23,7 +23,6 @@
 #include <cugraph_c/resource_handle.h>
 
 /** @defgroup community Community algorithms
- *  @ingroup c_api
  *  @{
  */
 
diff --git a/cpp/include/cugraph_c/core_algorithms.h b/cpp/include/cugraph_c/core_algorithms.h
index c0e348c3cf4..6db3269f61e 100644
--- a/cpp/include/cugraph_c/core_algorithms.h
+++ b/cpp/include/cugraph_c/core_algorithms.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,6 +21,9 @@
 #include <cugraph_c/graph.h>
 #include <cugraph_c/resource_handle.h>
 
+/** @defgroup core Core algorithms
+ */
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -40,6 +43,7 @@ typedef struct {
 } cugraph_k_core_result_t;
 
 /**
+ * @ingroup core
  * @brief       Create a core_number result (in case it was previously extracted)
  *
  * @param [in]  handle       Handle for accessing resources
@@ -58,6 +62,7 @@ cugraph_error_code_t cugraph_core_result_create(
   cugraph_error_t** error);
 
 /**
+ * @ingroup core
  * @brief       Get the vertex ids from the core result
  *
  * @param [in]     result   The result from core number
@@ -67,6 +72,7 @@ cugraph_type_erased_device_array_view_t* cugraph_core_result_get_vertices(
   cugraph_core_result_t* result);
 
 /**
+ * @ingroup core
  * @brief       Get the core numbers from the core result
  *
  * @param [in]    result    The result from core number
@@ -76,6 +82,7 @@ cugraph_type_erased_device_array_view_t* cugraph_core_result_get_core_numbers(
   cugraph_core_result_t* result);
 
 /**
+ * @ingroup core
  * @brief     Free core result
  *
  * @param [in]    result    The result from core number
@@ -83,6 +90,7 @@ cugraph_type_erased_device_array_view_t* cugraph_core_result_get_core_numbers(
 void cugraph_core_result_free(cugraph_core_result_t* result);
 
 /**
+ * @ingroup core
  * @brief       Get the src vertex ids from the k-core result
  *
  * @param [in]     result   The result from k-core
@@ -92,6 +100,7 @@ cugraph_type_erased_device_array_view_t* cugraph_k_core_result_get_src_vertices(
   cugraph_k_core_result_t* result);
 
 /**
+ * @ingroup core
  * @brief       Get the dst vertex ids from the k-core result
  *
  * @param [in]     result   The result from k-core
@@ -101,6 +110,7 @@ cugraph_type_erased_device_array_view_t* cugraph_k_core_result_get_dst_vertices(
   cugraph_k_core_result_t* result);
 
 /**
+ * @ingroup core
  * @brief       Get the weights from the k-core result
  *
  * Returns NULL if the graph is unweighted
@@ -112,6 +122,7 @@ cugraph_type_erased_device_array_view_t* cugraph_k_core_result_get_weights(
   cugraph_k_core_result_t* result);
 
 /**
+ * @ingroup core
  * @brief     Free k-core result
  *
  * @param [in]    result    The result from k-core
@@ -119,6 +130,7 @@ cugraph_type_erased_device_array_view_t* cugraph_k_core_result_get_weights(
 void cugraph_k_core_result_free(cugraph_k_core_result_t* result);
 
 /**
+ * @ingroup core
  * @brief     Enumeration for computing core number
  */
 typedef enum {
diff --git a/cpp/include/cugraph_c/graph.h b/cpp/include/cugraph_c/graph.h
index e910d8b1244..88176a9c1b6 100644
--- a/cpp/include/cugraph_c/graph.h
+++ b/cpp/include/cugraph_c/graph.h
@@ -35,10 +35,11 @@ typedef struct {
   bool_t is_multigraph;
 } cugraph_graph_properties_t;
 
-// FIXME: Add support for specifying isolated vertices
 /**
  * @brief     Construct an SG graph
  *
+ * @deprecated  This API will be deleted, use cugraph_graph_create_sg instead
+ *
  * @param [in]  handle         Handle for accessing resources
  * @param [in]  properties     Properties of the constructed graph
  * @param [in]  src            Device array containing the source vertex ids.
@@ -51,11 +52,11 @@ typedef struct {
                                argument that can be NULL if edge types are not used.
  * @param [in]  store_transposed If true create the graph initially in transposed format
  * @param [in]  renumber       If true, renumber vertices to make an efficient data structure.
- *    If false, do not renumber.  Renumbering is required if the vertices are not sequential
- *    integer values from 0 to num_vertices.
+ *    If false, do not renumber.  Renumbering enables some significant optimizations within
+ *    the graph primitives library, so it is strongly encouraged.  Renumbering is required if
+ *    the vertices are not sequential integer values from 0 to num_vertices.
  * @param [in]  do_expensive_check    If true, do expensive checks to validate the input data
  *    is consistent with software assumptions.  If false bypass these checks.
- * @param [in]  properties     Properties of the graph
  * @param [out] graph          A pointer to the graph object
  * @param [out] error          Pointer to an error object storing details of any error.  Will
  *                             be populated if error code is not CUGRAPH_SUCCESS
@@ -76,9 +77,63 @@ cugraph_error_code_t cugraph_sg_graph_create(
   cugraph_graph_t** graph,
   cugraph_error_t** error);
 
+/**
+ * @brief     Construct an SG graph
+ *
+ * @param [in]  handle         Handle for accessing resources
+ * @param [in]  properties     Properties of the constructed graph
+ * @param [in]  vertices       Optional device array containing a list of vertex ids
+ *                             (specify NULL if we should create vertex ids from the
+ *                             unique contents of @p src and @p dst)
+ * @param [in]  src            Device array containing the source vertex ids.
+ * @param [in]  dst            Device array containing the destination vertex ids
+ * @param [in]  weights        Device array containing the edge weights.  Note that an unweighted
+ *                             graph can be created by passing weights == NULL.
+ * @param [in]  edge_ids       Device array containing the edge ids for each edge.  Optional
+                               argument that can be NULL if edge ids are not used.
+ * @param [in]  edge_type_ids  Device array containing the edge types for each edge.  Optional
+                               argument that can be NULL if edge types are not used.
+ * @param [in]  store_transposed If true create the graph initially in transposed format
+ * @param [in]  renumber       If true, renumber vertices to make an efficient data structure.
+ *    If false, do not renumber.  Renumbering enables some significant optimizations within
+ *    the graph primitives library, so it is strongly encouraged.  Renumbering is required if
+ *    the vertices are not sequential integer values from 0 to num_vertices.
+ * @param [in]  drop_self_loops  If true, drop any self loops that exist in the provided edge list.
+ * @param [in]  drop_multi_edges If true, drop any multi edges that exist in the provided edge list.
+ *    Note that setting this flag will arbitrarily select one instance of a multi edge to be the
+ *    edge that survives.  If the edges have properties that should be honored (e.g. sum the
+ weights,
+ *    or take the maximum weight), the caller should do that on not rely on this flag.
+ * @param [in]  do_expensive_check    If true, do expensive checks to validate the input data
+ *    is consistent with software assumptions.  If false bypass these checks.
+ * @param [out] graph          A pointer to the graph object
+ * @param [out] error          Pointer to an error object storing details of any error.  Will
+ *                             be populated if error code is not CUGRAPH_SUCCESS
+ *
+ * @return error code
+ */
+cugraph_error_code_t cugraph_graph_create_sg(
+  const cugraph_resource_handle_t* handle,
+  const cugraph_graph_properties_t* properties,
+  const cugraph_type_erased_device_array_view_t* vertices,
+  const cugraph_type_erased_device_array_view_t* src,
+  const cugraph_type_erased_device_array_view_t* dst,
+  const cugraph_type_erased_device_array_view_t* weights,
+  const cugraph_type_erased_device_array_view_t* edge_ids,
+  const cugraph_type_erased_device_array_view_t* edge_type_ids,
+  bool_t store_transposed,
+  bool_t renumber,
+  bool_t drop_self_loops,
+  bool_t drop_multi_edges,
+  bool_t do_expensive_check,
+  cugraph_graph_t** graph,
+  cugraph_error_t** error);
+
 /**
  * @brief     Construct an SG graph from a CSR input
  *
+ * @deprecated  This API will be deleted, use cugraph_graph_create_sg_from_csr instead
+ *
  * @param [in]  handle         Handle for accessing resources
  * @param [in]  properties     Properties of the constructed graph
  * @param [in]  offsets        Device array containing the CSR offsets array
@@ -91,11 +146,11 @@ cugraph_error_code_t cugraph_sg_graph_create(
                                argument that can be NULL if edge types are not used.
  * @param [in]  store_transposed If true create the graph initially in transposed format
  * @param [in]  renumber       If true, renumber vertices to make an efficient data structure.
- *    If false, do not renumber.  Renumbering is required if the vertices are not sequential
- *    integer values from 0 to num_vertices.
+ *    If false, do not renumber.  Renumbering enables some significant optimizations within
+ *    the graph primitives library, so it is strongly encouraged.  Renumbering is required if
+ *    the vertices are not sequential integer values from 0 to num_vertices.
  * @param [in]  do_expensive_check    If true, do expensive checks to validate the input data
  *    is consistent with software assumptions.  If false bypass these checks.
- * @param [in]  properties     Properties of the graph
  * @param [out] graph          A pointer to the graph object
  * @param [out] error          Pointer to an error object storing details of any error.  Will
  *                             be populated if error code is not CUGRAPH_SUCCESS
@@ -117,18 +172,50 @@ cugraph_error_code_t cugraph_sg_graph_create_from_csr(
   cugraph_error_t** error);
 
 /**
- * @brief     Destroy an SG graph
+ * @brief     Construct an SG graph from a CSR input
  *
- * @param [in]  graph  A pointer to the graph object to destroy
+ * @param [in]  handle         Handle for accessing resources
+ * @param [in]  properties     Properties of the constructed graph
+ * @param [in]  offsets        Device array containing the CSR offsets array
+ * @param [in]  indices        Device array containing the destination vertex ids
+ * @param [in]  weights        Device array containing the edge weights.  Note that an unweighted
+ *                             graph can be created by passing weights == NULL.
+ * @param [in]  edge_ids       Device array containing the edge ids for each edge.  Optional
+                               argument that can be NULL if edge ids are not used.
+ * @param [in]  edge_type_ids  Device array containing the edge types for each edge.  Optional
+                               argument that can be NULL if edge types are not used.
+ * @param [in]  store_transposed If true create the graph initially in transposed format
+ * @param [in]  renumber       If true, renumber vertices to make an efficient data structure.
+ *    If false, do not renumber.  Renumbering enables some significant optimizations within
+ *    the graph primitives library, so it is strongly encouraged.  Renumbering is required if
+ *    the vertices are not sequential integer values from 0 to num_vertices.
+ * @param [in]  do_expensive_check    If true, do expensive checks to validate the input data
+ *    is consistent with software assumptions.  If false bypass these checks.
+ * @param [out] graph          A pointer to the graph object
+ * @param [out] error          Pointer to an error object storing details of any error.  Will
+ *                             be populated if error code is not CUGRAPH_SUCCESS
+ *
+ * @return error code
  */
-// FIXME:  This should probably just be cugraph_graph_free
-//         but didn't want to confuse with original cugraph_free_graph
-void cugraph_sg_graph_free(cugraph_graph_t* graph);
+cugraph_error_code_t cugraph_graph_create_sg_from_csr(
+  const cugraph_resource_handle_t* handle,
+  const cugraph_graph_properties_t* properties,
+  const cugraph_type_erased_device_array_view_t* offsets,
+  const cugraph_type_erased_device_array_view_t* indices,
+  const cugraph_type_erased_device_array_view_t* weights,
+  const cugraph_type_erased_device_array_view_t* edge_ids,
+  const cugraph_type_erased_device_array_view_t* edge_type_ids,
+  bool_t store_transposed,
+  bool_t renumber,
+  bool_t do_expensive_check,
+  cugraph_graph_t** graph,
+  cugraph_error_t** error);
 
-// FIXME: Add support for specifying isolated vertices
 /**
  * @brief     Construct an MG graph
  *
+ * @deprecated  This API will be deleted, use cugraph_graph_create_mg instead
+ *
  * @param [in]  handle          Handle for accessing resources
  * @param [in]  properties      Properties of the constructed graph
  * @param [in]  src             Device array containing the source vertex ids
@@ -165,13 +252,89 @@ cugraph_error_code_t cugraph_mg_graph_create(
   cugraph_graph_t** graph,
   cugraph_error_t** error);
 
+/**
+ * @brief     Construct an MG graph
+ *
+ * @param [in]  handle          Handle for accessing resources
+ * @param [in]  properties      Properties of the constructed graph
+ * @param [in]  vertices        List of device arrays containing the unique vertex ids.
+ *                              If NULL we will construct this internally using the unique
+ *                              entries specified in src and dst
+ *                              All entries in this list will be concatenated on this GPU
+ *                              into a single array.
+ * @param [in]  src             List of device array containing the source vertex ids
+ *                              All entries in this list will be concatenated on this GPU
+ *                              into a single array.
+ * @param [in]  dst             List of device array containing the destination vertex ids
+ *                              All entries in this list will be concatenated on this GPU
+ *                              into a single array.
+ * @param [in]  weights         List of device array containing the edge weights.  Note that an
+ * unweighted graph can be created by passing weights == NULL.  If a weighted graph is to be
+ * created, the weights device array should be created on each rank, but the pointer can be NULL and
+ * the size 0 if there are no inputs provided by this rank All entries in this list will be
+ * concatenated on this GPU into a single array.
+ * @param [in]  edge_ids        List of device array containing the edge ids for each edge. Optional
+ *                              argument that can be NULL if edge ids are not used.
+ *                              All entries in this list will be concatenated on this GPU
+ *                              into a single array.
+ * @param [in]  edge_type_ids   List of device array containing the edge types for each edge.
+ * Optional argument that can be NULL if edge types are not used. All entries in this list will be
+ * concatenated on this GPU into a single array.
+ * @param [in]  store_transposed If true create the graph initially in transposed format
+ * @param [in]  num_arrays      The number of arrays specified in @p vertices, @p src, @p dst, @p
+ *                              weights, @p edge_ids and @p edge_type_ids
+ * @param [in]  drop_self_loops  If true, drop any self loops that exist in the provided edge list.
+ * @param [in]  drop_multi_edges If true, drop any multi edges that exist in the provided edge list.
+ *    Note that setting this flag will arbitrarily select one instance of a multi edge to be the
+ *    edge that survives.  If the edges have properties that should be honored (e.g. sum the
+ * weights, or take the maximum weight), the caller should do that on not rely on this flag.
+ * @param [in]  do_expensive_check  If true, do expensive checks to validate the input data
+ *    is consistent with software assumptions.  If false bypass these checks.
+ * @param [out] graph           A pointer to the graph object
+ * @param [out] error           Pointer to an error object storing details of any error.  Will
+ *                              be populated if error code is not CUGRAPH_SUCCESS
+ * @return error code
+ */
+cugraph_error_code_t cugraph_graph_create_mg(
+  cugraph_resource_handle_t const* handle,
+  cugraph_graph_properties_t const* properties,
+  cugraph_type_erased_device_array_view_t const* const* vertices,
+  cugraph_type_erased_device_array_view_t const* const* src,
+  cugraph_type_erased_device_array_view_t const* const* dst,
+  cugraph_type_erased_device_array_view_t const* const* weights,
+  cugraph_type_erased_device_array_view_t const* const* edge_ids,
+  cugraph_type_erased_device_array_view_t const* const* edge_type_ids,
+  bool_t store_transposed,
+  size_t num_arrays,
+  bool_t drop_self_loops,
+  bool_t drop_multi_edges,
+  bool_t do_expensive_check,
+  cugraph_graph_t** graph,
+  cugraph_error_t** error);
+
+/**
+ * @brief     Destroy an graph
+ *
+ * @param [in]  graph  A pointer to the graph object to destroy
+ */
+void cugraph_graph_free(cugraph_graph_t* graph);
+
+/**
+ * @brief     Destroy an SG graph
+ *
+ * @deprecated  This API will be deleted, use cugraph_graph_free instead
+ *
+ * @param [in]  graph  A pointer to the graph object to destroy
+ */
+void cugraph_sg_graph_free(cugraph_graph_t* graph);
+
 /**
  * @brief     Destroy an MG graph
  *
+ * @deprecated  This API will be deleted, use cugraph_graph_free instead
+ *
  * @param [in]  graph  A pointer to the graph object to destroy
  */
-// FIXME:  This should probably just be cugraph_graph_free
-//         but didn't want to confuse with original cugraph_free_graph
 void cugraph_mg_graph_free(cugraph_graph_t* graph);
 
 /**
diff --git a/cpp/include/cugraph_c/graph_functions.h b/cpp/include/cugraph_c/graph_functions.h
index 655324df284..19b69922fa5 100644
--- a/cpp/include/cugraph_c/graph_functions.h
+++ b/cpp/include/cugraph_c/graph_functions.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -136,6 +136,24 @@ cugraph_type_erased_device_array_view_t* cugraph_induced_subgraph_get_destinatio
 cugraph_type_erased_device_array_view_t* cugraph_induced_subgraph_get_edge_weights(
   cugraph_induced_subgraph_result_t* induced_subgraph);
 
+/**
+ * @brief       Get the edge ids
+ *
+ * @param [in]     induced_subgraph   Opaque pointer to induced subgraph
+ * @return type erased array view of edge ids
+ */
+cugraph_type_erased_device_array_view_t* cugraph_induced_subgraph_get_edge_ids(
+  cugraph_induced_subgraph_result_t* induced_subgraph);
+
+/**
+ * @brief       Get the edge types
+ *
+ * @param [in]     induced_subgraph   Opaque pointer to induced subgraph
+ * @return type erased array view of edge types
+ */
+cugraph_type_erased_device_array_view_t* cugraph_induced_subgraph_get_edge_type_ids(
+  cugraph_induced_subgraph_result_t* induced_subgraph);
+
 /**
  * @brief       Get the subgraph offsets
  *
@@ -184,6 +202,33 @@ cugraph_error_code_t cugraph_extract_induced_subgraph(
   cugraph_induced_subgraph_result_t** result,
   cugraph_error_t** error);
 
+// FIXME: Rename the return type
+/**
+ * @brief      Gather edgelist
+ *
+ * This function collects the edgelist from all ranks and stores the combine edgelist
+ * in each rank
+ *
+ * @param [in]  handle            Handle for accessing resources.
+ * @param [in]  src               Device array containing the source vertex ids.
+ * @param [in]  dst               Device array containing the destination vertex ids
+ * @param [in]  weights           Optional device array containing the edge weights
+ * @param [in]  edge_ids          Optional device array containing the edge ids for each edge.
+ * @param [in]  edge_type_ids     Optional device array containing the edge types for each edge
+ * @param [out] result            Opaque pointer to gathered edgelist result
+ * @param [out] error             Pointer to an error object storing details of any error.  Will
+ *                                be populated if error code is not CUGRAPH_SUCCESS
+ * @return error code
+ */
+cugraph_error_code_t cugraph_allgather(const cugraph_resource_handle_t* handle,
+                                       const cugraph_type_erased_device_array_view_t* src,
+                                       const cugraph_type_erased_device_array_view_t* dst,
+                                       const cugraph_type_erased_device_array_view_t* weights,
+                                       const cugraph_type_erased_device_array_view_t* edge_ids,
+                                       const cugraph_type_erased_device_array_view_t* edge_type_ids,
+                                       cugraph_induced_subgraph_result_t** result,
+                                       cugraph_error_t** error);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/cpp/include/cugraph_c/labeling_algorithms.h b/cpp/include/cugraph_c/labeling_algorithms.h
index f3e634dafe6..53dcc0d9419 100644
--- a/cpp/include/cugraph_c/labeling_algorithms.h
+++ b/cpp/include/cugraph_c/labeling_algorithms.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,8 +25,6 @@ extern "C" {
 #endif
 
 /** @defgroup labeling Labeling algorithms
- *  @ingroup c_api
- *  @{
  */
 
 /**
@@ -37,6 +35,7 @@ typedef struct {
 } cugraph_labeling_result_t;
 
 /**
+ * @ingroup labeling
  * @brief     Get the vertex ids from the labeling result
  *
  * @param [in]   result   The result from a labeling algorithm
@@ -46,6 +45,7 @@ cugraph_type_erased_device_array_view_t* cugraph_labeling_result_get_vertices(
   cugraph_labeling_result_t* result);
 
 /**
+ * @ingroup labeling
  * @brief     Get the label values from the labeling result
  *
  * @param [in]   result   The result from a labeling algorithm
@@ -55,6 +55,7 @@ cugraph_type_erased_device_array_view_t* cugraph_labeling_result_get_labels(
   cugraph_labeling_result_t* result);
 
 /**
+ * @ingroup labeling
  * @brief     Free labeling result
  *
  * @param [in]   result   The result from a labeling algorithm
@@ -104,7 +105,3 @@ cugraph_error_code_t cugraph_strongly_connected_components(const cugraph_resourc
 #ifdef __cplusplus
 }
 #endif
-
-/**
- *  @}
- */
diff --git a/cpp/include/cugraph_c/resource_handle.h b/cpp/include/cugraph_c/resource_handle.h
index a239c24afe9..0e45102aae2 100644
--- a/cpp/include/cugraph_c/resource_handle.h
+++ b/cpp/include/cugraph_c/resource_handle.h
@@ -57,6 +57,18 @@ typedef struct cugraph_resource_handle_ {
  */
 cugraph_resource_handle_t* cugraph_create_resource_handle(void* raft_handle);
 
+/**
+ * @brief get comm_size from resource handle
+ *
+ * If the resource handle has been configured for multi-gpu, this will return
+ * the comm_size for this cluster.  If the resource handle has not been configured for
+ * multi-gpu this will always return 1.
+ *
+ * @param [in]  handle          Handle for accessing resources
+ * @return comm_size
+ */
+int cugraph_resource_handle_get_comm_size(const cugraph_resource_handle_t* handle);
+
 /**
  * @brief get rank from resource handle
  *
diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h
index 92fe50ef622..782bb5a3790 100644
--- a/cpp/include/cugraph_c/sampling_algorithms.h
+++ b/cpp/include/cugraph_c/sampling_algorithms.h
@@ -21,8 +21,7 @@
 #include <cugraph_c/random.h>
 #include <cugraph_c/resource_handle.h>
 
-/** @defgroup sampling Sampling algorithms
- *  @ingroup c_api
+/** @defgroup samplingC Sampling algorithms
  *  @{
  */
 
diff --git a/cpp/include/cugraph_c/similarity_algorithms.h b/cpp/include/cugraph_c/similarity_algorithms.h
index 1417d8ac566..b8f61b46545 100644
--- a/cpp/include/cugraph_c/similarity_algorithms.h
+++ b/cpp/include/cugraph_c/similarity_algorithms.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,6 +22,9 @@
 #include <cugraph_c/graph_functions.h>
 #include <cugraph_c/resource_handle.h>
 
+/** @defgroup similarity Similarity algorithms
+ */
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -34,6 +37,7 @@ typedef struct {
 } cugraph_similarity_result_t;
 
 /**
+ * @ingroup similarity
  * @brief       Get the similarity coefficient array
  *
  * @param [in]     result   The result from a similarity algorithm
@@ -43,6 +47,7 @@ cugraph_type_erased_device_array_view_t* cugraph_similarity_result_get_similarit
   cugraph_similarity_result_t* result);
 
 /**
+ * @ingroup similarity
  * @brief     Free similarity result
  *
  * @param [in]    result    The result from a similarity algorithm
diff --git a/cpp/src/c_api/allgather.cpp b/cpp/src/c_api/allgather.cpp
new file mode 100644
index 00000000000..7ef401aa6b7
--- /dev/null
+++ b/cpp/src/c_api/allgather.cpp
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cugraph_c/algorithms.h>
+#include <cugraph_c/graph_generators.h>
+
+#include <c_api/abstract_functor.hpp>
+#include <c_api/graph.hpp>
+#include <c_api/induced_subgraph_result.hpp>
+#include <c_api/resource_handle.hpp>
+#include <c_api/utils.hpp>
+
+#include <cugraph/algorithms.hpp>
+#include <cugraph/detail/collect_comm_wrapper.hpp>
+#include <cugraph/detail/shuffle_wrappers.hpp>
+#include <cugraph/detail/utility_wrappers.hpp>
+#include <cugraph/graph_functions.hpp>
+
+namespace {
+
+struct create_allgather_functor : public cugraph::c_api::abstract_functor {
+  raft::handle_t const& handle_;
+  cugraph::c_api::cugraph_type_erased_device_array_view_t const* src_;
+  cugraph::c_api::cugraph_type_erased_device_array_view_t const* dst_;
+  cugraph::c_api::cugraph_type_erased_device_array_view_t const* weights_;
+  cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_ids_;
+  cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_type_ids_;
+  cugraph::c_api::cugraph_induced_subgraph_result_t* result_{};
+
+  create_allgather_functor(
+    raft::handle_t const& handle,
+    cugraph::c_api::cugraph_type_erased_device_array_view_t const* src,
+    cugraph::c_api::cugraph_type_erased_device_array_view_t const* dst,
+    cugraph::c_api::cugraph_type_erased_device_array_view_t const* weights,
+    cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_ids,
+    cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_type_ids)
+    : abstract_functor(),
+      handle_(handle),
+      src_(src),
+      dst_(dst),
+      weights_(weights),
+      edge_ids_(edge_ids),
+      edge_type_ids_(edge_type_ids)
+  {
+  }
+
+  template <typename vertex_t,
+            typename edge_t,
+            typename weight_t,
+            typename edge_type_id_t,
+            bool store_transposed,
+            bool multi_gpu>
+  void operator()()
+  {
+    std::optional<rmm::device_uvector<vertex_t>> edgelist_srcs{std::nullopt};
+    if (src_) {
+      edgelist_srcs = rmm::device_uvector<vertex_t>(src_->size_, handle_.get_stream());
+      raft::copy(
+        edgelist_srcs->data(), src_->as_type<vertex_t>(), src_->size_, handle_.get_stream());
+    }
+
+    std::optional<rmm::device_uvector<vertex_t>> edgelist_dsts{std::nullopt};
+    if (dst_) {
+      edgelist_dsts = rmm::device_uvector<vertex_t>(dst_->size_, handle_.get_stream());
+      raft::copy(
+        edgelist_dsts->data(), dst_->as_type<vertex_t>(), dst_->size_, handle_.get_stream());
+    }
+
+    std::optional<rmm::device_uvector<weight_t>> edgelist_weights{std::nullopt};
+    if (weights_) {
+      edgelist_weights = rmm::device_uvector<weight_t>(weights_->size_, handle_.get_stream());
+      raft::copy(edgelist_weights->data(),
+                 weights_->as_type<weight_t>(),
+                 weights_->size_,
+                 handle_.get_stream());
+    }
+
+    std::optional<rmm::device_uvector<edge_t>> edgelist_ids{std::nullopt};
+    if (edge_ids_) {
+      edgelist_ids = rmm::device_uvector<edge_t>(edge_ids_->size_, handle_.get_stream());
+      raft::copy(
+        edgelist_ids->data(), edge_ids_->as_type<edge_t>(), edge_ids_->size_, handle_.get_stream());
+    }
+
+    std::optional<rmm::device_uvector<edge_type_id_t>> edgelist_type_ids{std::nullopt};
+    if (edge_type_ids_) {
+      edgelist_type_ids =
+        rmm::device_uvector<edge_type_id_t>(edge_type_ids_->size_, handle_.get_stream());
+      raft::copy(edgelist_type_ids->data(),
+                 edge_type_ids_->as_type<edge_type_id_t>(),
+                 edge_type_ids_->size_,
+                 handle_.get_stream());
+    }
+
+    auto& comm = handle_.get_comms();
+
+    if (edgelist_srcs) {
+      edgelist_srcs = cugraph::detail::device_allgatherv(
+        handle_,
+        comm,
+        raft::device_span<vertex_t const>(edgelist_srcs->data(), edgelist_srcs->size()));
+    }
+
+    if (edgelist_dsts) {
+      edgelist_dsts = cugraph::detail::device_allgatherv(
+        handle_,
+        comm,
+        raft::device_span<vertex_t const>(edgelist_dsts->data(), edgelist_dsts->size()));
+    }
+
+    rmm::device_uvector<size_t> edge_offsets(2, handle_.get_stream());
+
+    std::vector<size_t> h_edge_offsets{
+      {0, edgelist_srcs ? edgelist_srcs->size() : edgelist_weights->size()}};
+    raft::update_device(
+      edge_offsets.data(), h_edge_offsets.data(), h_edge_offsets.size(), handle_.get_stream());
+
+    cugraph::c_api::cugraph_induced_subgraph_result_t* result = NULL;
+
+    if (edgelist_weights) {
+      edgelist_weights = cugraph::detail::device_allgatherv(
+        handle_,
+        comm,
+        raft::device_span<weight_t const>(edgelist_weights->data(), edgelist_weights->size()));
+    }
+
+    if (edgelist_ids) {
+      edgelist_ids = cugraph::detail::device_allgatherv(
+        handle_, comm, raft::device_span<edge_t const>(edgelist_ids->data(), edgelist_ids->size()));
+    }
+
+    if (edgelist_type_ids) {
+      edgelist_type_ids =
+        cugraph::detail::device_allgatherv(handle_,
+                                           comm,
+                                           raft::device_span<edge_type_id_t const>(
+                                             edgelist_type_ids->data(), edgelist_type_ids->size()));
+    }
+
+    result = new cugraph::c_api::cugraph_induced_subgraph_result_t{
+      edgelist_srcs
+        ? new cugraph::c_api::cugraph_type_erased_device_array_t(*edgelist_srcs, src_->type_)
+        : NULL,
+      edgelist_dsts
+        ? new cugraph::c_api::cugraph_type_erased_device_array_t(*edgelist_dsts, dst_->type_)
+        : NULL,
+      edgelist_weights
+        ? new cugraph::c_api::cugraph_type_erased_device_array_t(*edgelist_weights, weights_->type_)
+        : NULL,
+      edgelist_ids
+        ? new cugraph::c_api::cugraph_type_erased_device_array_t(*edgelist_ids, edge_ids_->type_)
+        : NULL,
+      edgelist_type_ids ? new cugraph::c_api::cugraph_type_erased_device_array_t(
+                            *edgelist_type_ids, edge_type_ids_->type_)
+                        : NULL,
+      new cugraph::c_api::cugraph_type_erased_device_array_t(edge_offsets,
+                                                             cugraph_data_type_id_t::SIZE_T)};
+
+    result_ = reinterpret_cast<cugraph::c_api::cugraph_induced_subgraph_result_t*>(result);
+  }
+};
+
+}  // namespace
+
+extern "C" cugraph_error_code_t cugraph_allgather(
+  const cugraph_resource_handle_t* handle,
+  const cugraph_type_erased_device_array_view_t* src,
+  const cugraph_type_erased_device_array_view_t* dst,
+  const cugraph_type_erased_device_array_view_t* weights,
+  const cugraph_type_erased_device_array_view_t* edge_ids,
+  const cugraph_type_erased_device_array_view_t* edge_type_ids,
+  cugraph_induced_subgraph_result_t** edgelist,
+  cugraph_error_t** error)
+{
+  *edgelist = nullptr;
+  *error    = nullptr;
+
+  auto p_handle = reinterpret_cast<cugraph::c_api::cugraph_resource_handle_t const*>(handle);
+  auto p_src =
+    reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(src);
+  auto p_dst =
+    reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(dst);
+  auto p_weights =
+    reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(weights);
+
+  auto p_edge_ids =
+    reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(edge_ids);
+
+  auto p_edge_type_ids =
+    reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(edge_type_ids);
+
+  CAPI_EXPECTS((dst == nullptr) || (src == nullptr) || p_src->size_ == p_dst->size_,
+               CUGRAPH_INVALID_INPUT,
+               "Invalid input arguments: src size != dst size.",
+               *error);
+  CAPI_EXPECTS((dst == nullptr) || (src == nullptr) || p_src->type_ == p_dst->type_,
+               CUGRAPH_INVALID_INPUT,
+               "Invalid input arguments: src type != dst type.",
+               *error);
+
+  CAPI_EXPECTS((weights == nullptr) || (src == nullptr) || (p_weights->size_ == p_src->size_),
+               CUGRAPH_INVALID_INPUT,
+               "Invalid input arguments: src size != weights size.",
+               *error);
+
+  cugraph_data_type_id_t vertex_type;
+  cugraph_data_type_id_t edge_type;
+  cugraph_data_type_id_t weight_type;
+  cugraph_data_type_id_t edge_type_id_type;
+
+  if (src != nullptr) {
+    vertex_type = p_src->type_;
+  } else {
+    vertex_type = cugraph_data_type_id_t::INT32;
+  }
+
+  if (weights != nullptr) {
+    weight_type = p_weights->type_;
+  } else {
+    weight_type = cugraph_data_type_id_t::FLOAT32;
+  }
+
+  if (edge_ids != nullptr) {
+    edge_type = p_edge_ids->type_;
+  } else {
+    edge_type = cugraph_data_type_id_t::INT32;
+  }
+
+  if (edge_type_ids != nullptr) {
+    edge_type_id_type = p_edge_type_ids->type_;
+  } else {
+    edge_type_id_type = cugraph_data_type_id_t::INT32;
+  }
+
+  if (src != nullptr) {
+    CAPI_EXPECTS((edge_ids == nullptr) || (p_edge_ids->size_ == p_src->size_),
+                 CUGRAPH_INVALID_INPUT,
+                 "Invalid input arguments: src size != edge id prop size",
+                 *error);
+
+    CAPI_EXPECTS((edge_type_ids == nullptr) || (p_edge_type_ids->size_ == p_src->size_),
+                 CUGRAPH_INVALID_INPUT,
+                 "Invalid input arguments: src size != edge type prop size",
+                 *error);
+  }
+
+  constexpr bool multi_gpu        = false;
+  constexpr bool store_transposed = false;
+
+  ::create_allgather_functor functor(
+    *p_handle->handle_, p_src, p_dst, p_weights, p_edge_ids, p_edge_type_ids);
+
+  try {
+    cugraph::c_api::vertex_dispatcher(
+      vertex_type, edge_type, weight_type, edge_type_id_type, store_transposed, multi_gpu, functor);
+
+    if (functor.error_code_ != CUGRAPH_SUCCESS) {
+      *error = reinterpret_cast<cugraph_error_t*>(functor.error_.release());
+      return functor.error_code_;
+    }
+
+    *edgelist = reinterpret_cast<cugraph_induced_subgraph_result_t*>(functor.result_);
+  } catch (std::exception const& ex) {
+    *error = reinterpret_cast<cugraph_error_t*>(new cugraph::c_api::cugraph_error_t{ex.what()});
+    return CUGRAPH_UNKNOWN_ERROR;
+  }
+
+  return CUGRAPH_SUCCESS;
+}
diff --git a/cpp/src/c_api/extract_ego.cpp b/cpp/src/c_api/extract_ego.cpp
index 8f510b79023..931d58b5185 100644
--- a/cpp/src/c_api/extract_ego.cpp
+++ b/cpp/src/c_api/extract_ego.cpp
@@ -135,6 +135,8 @@ struct extract_ego_functor : public cugraph::c_api::abstract_functor {
         new cugraph::c_api::cugraph_type_erased_device_array_t(dst, graph_->vertex_type_),
         wgt ? new cugraph::c_api::cugraph_type_erased_device_array_t(*wgt, graph_->weight_type_)
             : NULL,
+        NULL,
+        NULL,
         new cugraph::c_api::cugraph_type_erased_device_array_t(edge_offsets,
                                                                cugraph_data_type_id_t::SIZE_T)};
     }
diff --git a/cpp/src/c_api/graph_mg.cpp b/cpp/src/c_api/graph_mg.cpp
index f50c7c08fb6..5413949e3a3 100644
--- a/cpp/src/c_api/graph_mg.cpp
+++ b/cpp/src/c_api/graph_mg.cpp
@@ -31,40 +31,85 @@
 
 namespace {
 
+template <typename value_t>
+rmm::device_uvector<value_t> concatenate(
+  raft::handle_t const& handle,
+  cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* values,
+  size_t num_arrays)
+{
+  size_t num_values = std::transform_reduce(
+    values, values + num_arrays, size_t{0}, std::plus{}, [](auto p) { return p->size_; });
+
+  rmm::device_uvector<value_t> results(num_values, handle.get_stream());
+  size_t concat_pos{0};
+
+  for (size_t i = 0; i < num_arrays; ++i) {
+    raft::copy<value_t>(results.data() + concat_pos,
+                        values[i]->as_type<value_t>(),
+                        values[i]->size_,
+                        handle.get_stream());
+    concat_pos += values[i]->size_;
+  }
+
+  return results;
+}
+
 struct create_graph_functor : public cugraph::c_api::abstract_functor {
   raft::handle_t const& handle_;
   cugraph_graph_properties_t const* properties_;
-  cugraph::c_api::cugraph_type_erased_device_array_view_t const* src_;
-  cugraph::c_api::cugraph_type_erased_device_array_view_t const* dst_;
-  cugraph::c_api::cugraph_type_erased_device_array_view_t const* weights_;
-  cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_ids_;
-  cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_type_ids_;
-  bool_t renumber_;
-  bool_t check_;
+  cugraph_data_type_id_t vertex_type_;
   cugraph_data_type_id_t edge_type_;
+  cugraph_data_type_id_t weight_type_;
+  cugraph_data_type_id_t edge_type_id_type_;
+  cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* vertices_;
+  cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* src_;
+  cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* dst_;
+  cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* weights_;
+  cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* edge_ids_;
+  cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* edge_type_ids_;
+  size_t num_arrays_;
+  bool_t renumber_;
+  bool_t drop_self_loops_;
+  bool_t drop_multi_edges_;
+  bool_t do_expensive_check_;
   cugraph::c_api::cugraph_graph_t* result_{};
 
-  create_graph_functor(raft::handle_t const& handle,
-                       cugraph_graph_properties_t const* properties,
-                       cugraph::c_api::cugraph_type_erased_device_array_view_t const* src,
-                       cugraph::c_api::cugraph_type_erased_device_array_view_t const* dst,
-                       cugraph::c_api::cugraph_type_erased_device_array_view_t const* weights,
-                       cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_ids,
-                       cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_type_ids,
-                       bool_t renumber,
-                       bool_t check,
-                       cugraph_data_type_id_t edge_type)
+  create_graph_functor(
+    raft::handle_t const& handle,
+    cugraph_graph_properties_t const* properties,
+    cugraph_data_type_id_t vertex_type,
+    cugraph_data_type_id_t edge_type,
+    cugraph_data_type_id_t weight_type,
+    cugraph_data_type_id_t edge_type_id_type,
+    cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* vertices,
+    cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* src,
+    cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* dst,
+    cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* weights,
+    cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* edge_ids,
+    cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* edge_type_ids,
+    size_t num_arrays,
+    bool_t renumber,
+    bool_t drop_self_loops,
+    bool_t drop_multi_edges,
+    bool_t do_expensive_check)
     : abstract_functor(),
       properties_(properties),
+      vertex_type_(vertex_type),
+      edge_type_(edge_type),
+      weight_type_(weight_type),
+      edge_type_id_type_(edge_type_id_type),
       handle_(handle),
+      vertices_(vertices),
       src_(src),
       dst_(dst),
       weights_(weights),
       edge_ids_(edge_ids),
       edge_type_ids_(edge_type_ids),
+      num_arrays_(num_arrays),
       renumber_(renumber),
-      check_(check),
-      edge_type_(edge_type)
+      drop_self_loops_(drop_self_loops),
+      drop_multi_edges_(drop_multi_edges),
+      do_expensive_check_(do_expensive_check)
   {
   }
 
@@ -96,49 +141,27 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor {
         edge_type_id_t>>
         new_edge_types{std::nullopt};
 
-      rmm::device_uvector<vertex_t> edgelist_srcs(src_->size_, handle_.get_stream());
-      rmm::device_uvector<vertex_t> edgelist_dsts(dst_->size_, handle_.get_stream());
+      std::optional<rmm::device_uvector<vertex_t>> vertex_list =
+        vertices_ ? std::make_optional(concatenate<vertex_t>(handle_, vertices_, num_arrays_))
+                  : std::nullopt;
 
-      raft::copy<vertex_t>(
-        edgelist_srcs.data(), src_->as_type<vertex_t>(), src_->size_, handle_.get_stream());
-      raft::copy<vertex_t>(
-        edgelist_dsts.data(), dst_->as_type<vertex_t>(), dst_->size_, handle_.get_stream());
+      rmm::device_uvector<vertex_t> edgelist_srcs =
+        concatenate<vertex_t>(handle_, src_, num_arrays_);
+      rmm::device_uvector<vertex_t> edgelist_dsts =
+        concatenate<vertex_t>(handle_, dst_, num_arrays_);
 
       std::optional<rmm::device_uvector<weight_t>> edgelist_weights =
-        weights_
-          ? std::make_optional(rmm::device_uvector<weight_t>(weights_->size_, handle_.get_stream()))
-          : std::nullopt;
-
-      if (edgelist_weights) {
-        raft::copy<weight_t>(edgelist_weights->data(),
-                             weights_->as_type<weight_t>(),
-                             weights_->size_,
-                             handle_.get_stream());
-      }
+        weights_ ? std::make_optional(concatenate<weight_t>(handle_, weights_, num_arrays_))
+                 : std::nullopt;
 
       std::optional<rmm::device_uvector<edge_t>> edgelist_edge_ids =
-        edge_ids_
-          ? std::make_optional(rmm::device_uvector<edge_t>(edge_ids_->size_, handle_.get_stream()))
-          : std::nullopt;
-
-      if (edgelist_edge_ids) {
-        raft::copy<edge_t>(edgelist_edge_ids->data(),
-                           edge_ids_->as_type<edge_t>(),
-                           edge_ids_->size_,
-                           handle_.get_stream());
-      }
+        edge_ids_ ? std::make_optional(concatenate<edge_t>(handle_, edge_ids_, num_arrays_))
+                  : std::nullopt;
 
       std::optional<rmm::device_uvector<edge_type_id_t>> edgelist_edge_types =
-        edge_type_ids_ ? std::make_optional(rmm::device_uvector<edge_type_id_t>(
-                           edge_type_ids_->size_, handle_.get_stream()))
-                       : std::nullopt;
-
-      if (edgelist_edge_types) {
-        raft::copy<edge_type_id_t>(edgelist_edge_types->data(),
-                                   edge_type_ids_->as_type<edge_type_id_t>(),
-                                   edge_type_ids_->size_,
-                                   handle_.get_stream());
-      }
+        edge_type_ids_
+          ? std::make_optional(concatenate<edge_type_id_t>(handle_, edge_type_ids_, num_arrays_))
+          : std::nullopt;
 
       std::tie(store_transposed ? edgelist_dsts : edgelist_srcs,
                store_transposed ? edgelist_srcs : edgelist_dsts,
@@ -153,6 +176,11 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor {
           std::move(edgelist_edge_ids),
           std::move(edgelist_edge_types));
 
+      if (vertex_list) {
+        vertex_list = cugraph::detail::shuffle_ext_vertices_to_local_gpu_by_vertex_partitioning(
+          handle_, std::move(*vertex_list));
+      }
+
       auto graph = new cugraph::graph_t<vertex_t, edge_t, store_transposed, multi_gpu>(handle_);
 
       rmm::device_uvector<vertex_t>* number_map =
@@ -170,6 +198,28 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor {
         cugraph::graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu>,
         edge_type_id_t>(handle_);
 
+      if (drop_self_loops_) {
+        std::tie(
+          edgelist_srcs, edgelist_dsts, edgelist_weights, edgelist_edge_ids, edgelist_edge_types) =
+          cugraph::remove_self_loops(handle_,
+                                     std::move(edgelist_srcs),
+                                     std::move(edgelist_dsts),
+                                     std::move(edgelist_weights),
+                                     std::move(edgelist_edge_ids),
+                                     std::move(edgelist_edge_types));
+      }
+
+      if (drop_multi_edges_) {
+        std::tie(
+          edgelist_srcs, edgelist_dsts, edgelist_weights, edgelist_edge_ids, edgelist_edge_types) =
+          cugraph::remove_multi_edges(handle_,
+                                      std::move(edgelist_srcs),
+                                      std::move(edgelist_dsts),
+                                      std::move(edgelist_weights),
+                                      std::move(edgelist_edge_ids),
+                                      std::move(edgelist_edge_types));
+      }
+
       std::tie(*graph, new_edge_weights, new_edge_ids, new_edge_types, new_number_map) =
         cugraph::create_graph_from_edgelist<vertex_t,
                                             edge_t,
@@ -179,7 +229,7 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor {
                                             store_transposed,
                                             multi_gpu>(
           handle_,
-          std::nullopt,
+          std::move(vertex_list),
           std::move(edgelist_srcs),
           std::move(edgelist_dsts),
           std::move(edgelist_weights),
@@ -187,7 +237,7 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor {
           std::move(edgelist_edge_types),
           cugraph::graph_properties_t{properties_->is_symmetric, properties_->is_multigraph},
           renumber_,
-          check_);
+          do_expensive_check_);
 
       if (renumber_) {
         *number_map = std::move(new_number_map.value());
@@ -204,90 +254,39 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor {
       if (new_edge_types) { *edge_types = std::move(new_edge_types.value()); }
 
       // Set up return
-      auto result = new cugraph::c_api::cugraph_graph_t{
-        src_->type_,
-        edge_type_,
-        weights_ ? weights_->type_ : cugraph_data_type_id_t::FLOAT32,
-        edge_type_ids_ ? edge_type_ids_->type_ : cugraph_data_type_id_t::INT32,
-        store_transposed,
-        multi_gpu,
-        graph,
-        number_map,
-        new_edge_weights ? edge_weights : nullptr,
-        new_edge_ids ? edge_ids : nullptr,
-        new_edge_types ? edge_types : nullptr};
+      auto result = new cugraph::c_api::cugraph_graph_t{vertex_type_,
+                                                        edge_type_,
+                                                        weight_type_,
+                                                        edge_type_id_type_,
+                                                        store_transposed,
+                                                        multi_gpu,
+                                                        graph,
+                                                        number_map,
+                                                        new_edge_weights ? edge_weights : nullptr,
+                                                        new_edge_ids ? edge_ids : nullptr,
+                                                        new_edge_types ? edge_types : nullptr};
 
       result_ = reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(result);
     }
   }
 };
 
-struct destroy_graph_functor : public cugraph::c_api::abstract_functor {
-  void* graph_;
-  void* number_map_;
-  void* edge_weights_;
-  void* edge_ids_;
-  void* edge_types_;
-
-  destroy_graph_functor(
-    void* graph, void* number_map, void* edge_weights, void* edge_ids, void* edge_types)
-    : abstract_functor(),
-      graph_(graph),
-      number_map_(number_map),
-      edge_weights_(edge_weights),
-      edge_ids_(edge_ids),
-      edge_types_(edge_types)
-  {
-  }
-
-  template <typename vertex_t,
-            typename edge_t,
-            typename weight_t,
-            typename edge_type_id_t,
-            bool store_transposed,
-            bool multi_gpu>
-  void operator()()
-  {
-    auto internal_graph_pointer =
-      reinterpret_cast<cugraph::graph_t<vertex_t, edge_t, store_transposed, multi_gpu>*>(graph_);
-
-    delete internal_graph_pointer;
-
-    auto internal_number_map_pointer =
-      reinterpret_cast<rmm::device_uvector<vertex_t>*>(number_map_);
-
-    delete internal_number_map_pointer;
-
-    auto internal_edge_weight_pointer = reinterpret_cast<
-      cugraph::edge_property_t<cugraph::graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu>,
-                               weight_t>*>(edge_weights_);
-    if (internal_edge_weight_pointer) { delete internal_edge_weight_pointer; }
-
-    auto internal_edge_id_pointer = reinterpret_cast<
-      cugraph::edge_property_t<cugraph::graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu>,
-                               edge_t>*>(edge_ids_);
-    if (internal_edge_id_pointer) { delete internal_edge_id_pointer; }
-
-    auto internal_edge_type_pointer = reinterpret_cast<
-      cugraph::edge_property_t<cugraph::graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu>,
-                               edge_type_id_t>*>(edge_types_);
-    if (internal_edge_type_pointer) { delete internal_edge_type_pointer; }
-  }
-};
-
 }  // namespace
 
-extern "C" cugraph_error_code_t cugraph_mg_graph_create(
-  const cugraph_resource_handle_t* handle,
-  const cugraph_graph_properties_t* properties,
-  const cugraph_type_erased_device_array_view_t* src,
-  const cugraph_type_erased_device_array_view_t* dst,
-  const cugraph_type_erased_device_array_view_t* weights,
-  const cugraph_type_erased_device_array_view_t* edge_ids,
-  const cugraph_type_erased_device_array_view_t* edge_type_ids,
+extern "C" cugraph_error_code_t cugraph_graph_create_mg(
+  cugraph_resource_handle_t const* handle,
+  cugraph_graph_properties_t const* properties,
+  cugraph_type_erased_device_array_view_t const* const* vertices,
+  cugraph_type_erased_device_array_view_t const* const* src,
+  cugraph_type_erased_device_array_view_t const* const* dst,
+  cugraph_type_erased_device_array_view_t const* const* weights,
+  cugraph_type_erased_device_array_view_t const* const* edge_ids,
+  cugraph_type_erased_device_array_view_t const* const* edge_type_ids,
   bool_t store_transposed,
-  size_t num_edges,
-  bool_t check,
+  size_t num_arrays,
+  bool_t drop_self_loops,
+  bool_t drop_multi_edges,
+  bool_t do_expensive_check,
   cugraph_graph_t** graph,
   cugraph_error_t** error)
 {
@@ -298,87 +297,198 @@ extern "C" cugraph_error_code_t cugraph_mg_graph_create(
   *error = nullptr;
 
   auto p_handle = reinterpret_cast<cugraph::c_api::cugraph_resource_handle_t const*>(handle);
+  auto p_vertices =
+    reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const* const*>(
+      vertices);
   auto p_src =
-    reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(src);
+    reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const* const*>(src);
   auto p_dst =
-    reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(dst);
+    reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const* const*>(dst);
   auto p_weights =
-    reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(weights);
+    reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const* const*>(
+      weights);
   auto p_edge_ids =
-    reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(edge_ids);
+    reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const* const*>(
+      edge_ids);
   auto p_edge_type_ids =
-    reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(edge_type_ids);
+    reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const* const*>(
+      edge_type_ids);
+
+  size_t local_num_edges{0};
+
+  //
+  // Determine the type of vertex, weight, edge_type_id across
+  // multiple input arrays and acros multiple GPUs.  Also compute
+  // the number of edges so we can determine what type to use for
+  // edge_t
+  //
+  cugraph_data_type_id_t vertex_type{cugraph_data_type_id_t::NTYPES};
+  cugraph_data_type_id_t weight_type{cugraph_data_type_id_t::NTYPES};
+
+  for (size_t i = 0; i < num_arrays; ++i) {
+    CAPI_EXPECTS(p_src[i]->size_ == p_dst[i]->size_,
+                 CUGRAPH_INVALID_INPUT,
+                 "Invalid input arguments: src size != dst size.",
+                 *error);
+
+    CAPI_EXPECTS(p_src[i]->type_ == p_dst[i]->type_,
+                 CUGRAPH_INVALID_INPUT,
+                 "Invalid input arguments: src type != dst type.",
+                 *error);
+
+    CAPI_EXPECTS((p_vertices == nullptr) || (p_src[i]->type_ == p_vertices[i]->type_),
+                 CUGRAPH_INVALID_INPUT,
+                 "Invalid input arguments: src type != vertices type.",
+                 *error);
+
+    CAPI_EXPECTS((weights == nullptr) || (p_weights[i]->size_ == p_src[i]->size_),
+                 CUGRAPH_INVALID_INPUT,
+                 "Invalid input arguments: src size != weights size.",
+                 *error);
+
+    local_num_edges += p_src[i]->size_;
+
+    if (vertex_type == cugraph_data_type_id_t::NTYPES) vertex_type = p_src[i]->type_;
+
+    if (weights != nullptr) {
+      if (weight_type == cugraph_data_type_id_t::NTYPES) weight_type = p_weights[i]->type_;
+    }
 
-  CAPI_EXPECTS(p_src->size_ == p_dst->size_,
-               CUGRAPH_INVALID_INPUT,
-               "Invalid input arguments: src size != dst size.",
-               *error);
-  CAPI_EXPECTS(p_src->type_ == p_dst->type_,
+    CAPI_EXPECTS(p_src[i]->type_ == vertex_type,
+                 CUGRAPH_INVALID_INPUT,
+                 "Invalid input arguments: all vertex types must match",
+                 *error);
+
+    CAPI_EXPECTS((weights == nullptr) || (p_weights[i]->type_ == weight_type),
+                 CUGRAPH_INVALID_INPUT,
+                 "Invalid input arguments: all weight types must match",
+                 *error);
+  }
+
+  size_t num_edges = cugraph::host_scalar_allreduce(p_handle->handle_->get_comms(),
+                                                    local_num_edges,
+                                                    raft::comms::op_t::SUM,
+                                                    p_handle->handle_->get_stream());
+
+  auto vertex_types = cugraph::host_scalar_allgather(
+    p_handle->handle_->get_comms(), static_cast<int>(vertex_type), p_handle->handle_->get_stream());
+
+  auto weight_types = cugraph::host_scalar_allgather(
+    p_handle->handle_->get_comms(), static_cast<int>(weight_type), p_handle->handle_->get_stream());
+
+  if (vertex_type == cugraph_data_type_id_t::NTYPES) {
+    // Only true if this GPU had no vertex arrays
+    vertex_type = static_cast<cugraph_data_type_id_t>(
+      *std::min_element(vertex_types.begin(), vertex_types.end()));
+  }
+
+  if (weight_type == cugraph_data_type_id_t::NTYPES) {
+    // Only true if this GPU had no weight arrays
+    weight_type = static_cast<cugraph_data_type_id_t>(
+      *std::min_element(weight_types.begin(), weight_types.end()));
+  }
+
+  CAPI_EXPECTS(std::all_of(vertex_types.begin(),
+                           vertex_types.end(),
+                           [vertex_type](auto t) { return vertex_type == static_cast<int>(t); }),
                CUGRAPH_INVALID_INPUT,
-               "Invalid input arguments: src type != dst type.",
+               "different vertex type used on different GPUs",
                *error);
 
-  CAPI_EXPECTS((weights == nullptr) || (p_weights->size_ == p_src->size_),
+  CAPI_EXPECTS(std::all_of(weight_types.begin(),
+                           weight_types.end(),
+                           [weight_type](auto t) { return weight_type == static_cast<int>(t); }),
                CUGRAPH_INVALID_INPUT,
-               "Invalid input arguments: src size != weights size.",
+               "different weight type used on different GPUs",
                *error);
 
   cugraph_data_type_id_t edge_type;
-  cugraph_data_type_id_t weight_type;
 
   if (num_edges < int32_threshold) {
-    edge_type = p_src->type_;
+    edge_type = static_cast<cugraph_data_type_id_t>(vertex_types[0]);
   } else {
     edge_type = cugraph_data_type_id_t::INT64;
   }
 
-  if (weights != nullptr) {
-    weight_type = p_weights->type_;
-  } else {
+  if (weight_type == cugraph_data_type_id_t::NTYPES) {
     weight_type = cugraph_data_type_id_t::FLOAT32;
   }
 
-  CAPI_EXPECTS((edge_ids == nullptr) || (p_edge_ids->type_ == edge_type),
-               CUGRAPH_INVALID_INPUT,
-               "Invalid input arguments: Edge id type must match edge type",
-               *error);
+  cugraph_data_type_id_t edge_type_id_type{cugraph_data_type_id_t::NTYPES};
 
-  CAPI_EXPECTS((edge_ids == nullptr) || (p_edge_ids->size_ == p_src->size_),
-               CUGRAPH_INVALID_INPUT,
-               "Invalid input arguments: src size != edge id prop size",
-               *error);
+  for (size_t i = 0; i < num_arrays; ++i) {
+    CAPI_EXPECTS((edge_ids == nullptr) || (p_edge_ids[i]->type_ == edge_type),
+                 CUGRAPH_INVALID_INPUT,
+                 "Invalid input arguments: Edge id type must match edge type",
+                 *error);
 
-  CAPI_EXPECTS((edge_type_ids == nullptr) || (p_edge_type_ids->size_ == p_src->size_),
-               CUGRAPH_INVALID_INPUT,
-               "Invalid input arguments: src size != edge type prop size",
-               *error);
+    CAPI_EXPECTS((edge_ids == nullptr) || (p_edge_ids[i]->size_ == p_src[i]->size_),
+                 CUGRAPH_INVALID_INPUT,
+                 "Invalid input arguments: src size != edge id prop size",
+                 *error);
+
+    if (edge_type_ids != nullptr) {
+      CAPI_EXPECTS(p_edge_type_ids[i]->size_ == p_src[i]->size_,
+                   CUGRAPH_INVALID_INPUT,
+                   "Invalid input arguments: src size != edge type prop size",
+                   *error);
+
+      if (edge_type_id_type == cugraph_data_type_id_t::NTYPES)
+        edge_type_id_type = p_edge_type_ids[i]->type_;
+
+      CAPI_EXPECTS(p_edge_type_ids[i]->type_ == edge_type_id_type,
+                   CUGRAPH_INVALID_INPUT,
+                   "Invalid input arguments: src size != edge type prop size",
+                   *error);
+    }
+  }
+
+  auto edge_type_id_types = cugraph::host_scalar_allgather(p_handle->handle_->get_comms(),
+                                                           static_cast<int>(edge_type_id_type),
+                                                           p_handle->handle_->get_stream());
+
+  if (edge_type_id_type == cugraph_data_type_id_t::NTYPES) {
+    // Only true if this GPU had no edge_type_id arrays
+    edge_type_id_type = static_cast<cugraph_data_type_id_t>(
+      *std::min_element(edge_type_id_types.begin(), edge_type_id_types.end()));
+  }
+
+  CAPI_EXPECTS(
+    std::all_of(edge_type_id_types.begin(),
+                edge_type_id_types.end(),
+                [edge_type_id_type](auto t) { return edge_type_id_type == static_cast<int>(t); }),
+    CUGRAPH_INVALID_INPUT,
+    "different edge_type_id type used on different GPUs",
+    *error);
 
-  cugraph_data_type_id_t edge_type_id_type;
-  if (edge_type_ids == nullptr) {
+  if (edge_type_id_type == cugraph_data_type_id_t::NTYPES) {
     edge_type_id_type = cugraph_data_type_id_t::INT32;
-  } else {
-    edge_type_id_type = p_edge_type_ids->type_;
   }
 
+  //
+  // Now we know enough to create the graph
+  //
   create_graph_functor functor(*p_handle->handle_,
                                properties,
+                               vertex_type,
+                               edge_type,
+                               weight_type,
+                               edge_type_id_type,
+                               p_vertices,
                                p_src,
                                p_dst,
                                p_weights,
                                p_edge_ids,
                                p_edge_type_ids,
+                               num_arrays,
                                bool_t::TRUE,
-                               check,
-                               edge_type);
+                               drop_self_loops,
+                               drop_multi_edges,
+                               do_expensive_check);
 
   try {
-    cugraph::c_api::vertex_dispatcher(p_src->type_,
-                                      edge_type,
-                                      weight_type,
-                                      edge_type_id_type,
-                                      store_transposed,
-                                      multi_gpu,
-                                      functor);
+    cugraph::c_api::vertex_dispatcher(
+      vertex_type, edge_type, weight_type, edge_type_id_type, store_transposed, multi_gpu, functor);
 
     if (functor.error_code_ != CUGRAPH_SUCCESS) {
       *error = reinterpret_cast<cugraph_error_t*>(functor.error_.release());
@@ -394,25 +504,38 @@ extern "C" cugraph_error_code_t cugraph_mg_graph_create(
   return CUGRAPH_SUCCESS;
 }
 
+extern "C" cugraph_error_code_t cugraph_mg_graph_create(
+  cugraph_resource_handle_t const* handle,
+  cugraph_graph_properties_t const* properties,
+  cugraph_type_erased_device_array_view_t const* src,
+  cugraph_type_erased_device_array_view_t const* dst,
+  cugraph_type_erased_device_array_view_t const* weights,
+  cugraph_type_erased_device_array_view_t const* edge_ids,
+  cugraph_type_erased_device_array_view_t const* edge_type_ids,
+  bool_t store_transposed,
+  size_t num_edges,
+  bool_t do_expensive_check,
+  cugraph_graph_t** graph,
+  cugraph_error_t** error)
+{
+  return cugraph_graph_create_mg(handle,
+                                 properties,
+                                 NULL,
+                                 &src,
+                                 &dst,
+                                 &weights,
+                                 &edge_ids,
+                                 &edge_type_ids,
+                                 store_transposed,
+                                 1,
+                                 FALSE,
+                                 FALSE,
+                                 do_expensive_check,
+                                 graph,
+                                 error);
+}
+
 extern "C" void cugraph_mg_graph_free(cugraph_graph_t* ptr_graph)
 {
-  if (ptr_graph != NULL) {
-    auto internal_pointer = reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(ptr_graph);
-
-    destroy_graph_functor functor(internal_pointer->graph_,
-                                  internal_pointer->number_map_,
-                                  internal_pointer->edge_weights_,
-                                  internal_pointer->edge_ids_,
-                                  internal_pointer->edge_types_);
-
-    cugraph::c_api::vertex_dispatcher(internal_pointer->vertex_type_,
-                                      internal_pointer->edge_type_,
-                                      internal_pointer->weight_type_,
-                                      internal_pointer->edge_type_id_type_,
-                                      internal_pointer->store_transposed_,
-                                      internal_pointer->multi_gpu_,
-                                      functor);
-
-    delete internal_pointer;
-  }
+  if (ptr_graph != NULL) { cugraph_graph_free(ptr_graph); }
 }
diff --git a/cpp/src/c_api/graph_sg.cpp b/cpp/src/c_api/graph_sg.cpp
index 9536869f123..7793458b53a 100644
--- a/cpp/src/c_api/graph_sg.cpp
+++ b/cpp/src/c_api/graph_sg.cpp
@@ -33,35 +33,44 @@ namespace {
 struct create_graph_functor : public cugraph::c_api::abstract_functor {
   raft::handle_t const& handle_;
   cugraph_graph_properties_t const* properties_;
+  cugraph::c_api::cugraph_type_erased_device_array_view_t const* vertices_;
   cugraph::c_api::cugraph_type_erased_device_array_view_t const* src_;
   cugraph::c_api::cugraph_type_erased_device_array_view_t const* dst_;
   cugraph::c_api::cugraph_type_erased_device_array_view_t const* weights_;
   cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_ids_;
   cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_type_ids_;
   bool_t renumber_;
+  bool_t drop_self_loops_;
+  bool_t drop_multi_edges_;
   bool_t do_expensive_check_;
   cugraph_data_type_id_t edge_type_;
   cugraph::c_api::cugraph_graph_t* result_{};
 
   create_graph_functor(raft::handle_t const& handle,
                        cugraph_graph_properties_t const* properties,
+                       cugraph::c_api::cugraph_type_erased_device_array_view_t const* vertices,
                        cugraph::c_api::cugraph_type_erased_device_array_view_t const* src,
                        cugraph::c_api::cugraph_type_erased_device_array_view_t const* dst,
                        cugraph::c_api::cugraph_type_erased_device_array_view_t const* weights,
                        cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_ids,
                        cugraph::c_api::cugraph_type_erased_device_array_view_t const* edge_type_ids,
                        bool_t renumber,
+                       bool_t drop_self_loops,
+                       bool_t drop_multi_edges,
                        bool_t do_expensive_check,
                        cugraph_data_type_id_t edge_type)
     : abstract_functor(),
       properties_(properties),
       handle_(handle),
+      vertices_(vertices),
       src_(src),
       dst_(dst),
       weights_(weights),
       edge_ids_(edge_ids),
       edge_type_ids_(edge_type_ids),
       renumber_(renumber),
+      drop_self_loops_(drop_self_loops),
+      drop_multi_edges_(drop_multi_edges),
       do_expensive_check_(do_expensive_check),
       edge_type_(edge_type)
   {
@@ -99,6 +108,18 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor {
         edge_type_id_t>>
         new_edge_types{std::nullopt};
 
+      std::optional<rmm::device_uvector<vertex_t>> vertex_list =
+        vertices_ ? std::make_optional(
+                      rmm::device_uvector<vertex_t>(vertices_->size_, handle_.get_stream()))
+                  : std::nullopt;
+
+      if (vertex_list) {
+        raft::copy<vertex_t>(vertex_list->data(),
+                             vertices_->as_type<vertex_t>(),
+                             vertices_->size_,
+                             handle_.get_stream());
+      }
+
       rmm::device_uvector<vertex_t> edgelist_srcs(src_->size_, handle_.get_stream());
       rmm::device_uvector<vertex_t> edgelist_dsts(dst_->size_, handle_.get_stream());
 
@@ -160,6 +181,28 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor {
         cugraph::graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu>,
         edge_type_id_t>(handle_);
 
+      if (drop_self_loops_) {
+        std::tie(
+          edgelist_srcs, edgelist_dsts, edgelist_weights, edgelist_edge_ids, edgelist_edge_types) =
+          cugraph::remove_self_loops(handle_,
+                                     std::move(edgelist_srcs),
+                                     std::move(edgelist_dsts),
+                                     std::move(edgelist_weights),
+                                     std::move(edgelist_edge_ids),
+                                     std::move(edgelist_edge_types));
+      }
+
+      if (drop_multi_edges_) {
+        std::tie(
+          edgelist_srcs, edgelist_dsts, edgelist_weights, edgelist_edge_ids, edgelist_edge_types) =
+          cugraph::remove_multi_edges(handle_,
+                                      std::move(edgelist_srcs),
+                                      std::move(edgelist_dsts),
+                                      std::move(edgelist_weights),
+                                      std::move(edgelist_edge_ids),
+                                      std::move(edgelist_edge_types));
+      }
+
       std::tie(*graph, new_edge_weights, new_edge_ids, new_edge_types, new_number_map) =
         cugraph::create_graph_from_edgelist<vertex_t,
                                             edge_t,
@@ -169,7 +212,7 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor {
                                             store_transposed,
                                             multi_gpu>(
           handle_,
-          std::nullopt,
+          std::move(vertex_list),
           std::move(edgelist_srcs),
           std::move(edgelist_dsts),
           std::move(edgelist_weights),
@@ -279,6 +322,12 @@ struct create_graph_csr_functor : public cugraph::c_api::abstract_functor {
         edge_type_id_t>>
         new_edge_types{std::nullopt};
 
+      std::optional<rmm::device_uvector<vertex_t>> vertex_list = std::make_optional(
+        rmm::device_uvector<vertex_t>(offsets_->size_ - 1, handle_.get_stream()));
+
+      cugraph::detail::sequence_fill(
+        handle_.get_stream(), vertex_list->data(), vertex_list->size(), vertex_t{0});
+
       rmm::device_uvector<vertex_t> edgelist_srcs(0, handle_.get_stream());
       rmm::device_uvector<vertex_t> edgelist_dsts(indices_->size_, handle_.get_stream());
 
@@ -354,7 +403,7 @@ struct create_graph_csr_functor : public cugraph::c_api::abstract_functor {
                                             store_transposed,
                                             multi_gpu>(
           handle_,
-          std::nullopt,
+          std::move(vertex_list),
           std::move(edgelist_srcs),
           std::move(edgelist_dsts),
           std::move(edgelist_weights),
@@ -452,9 +501,10 @@ struct destroy_graph_functor : public cugraph::c_api::abstract_functor {
 
 }  // namespace
 
-extern "C" cugraph_error_code_t cugraph_sg_graph_create(
+extern "C" cugraph_error_code_t cugraph_graph_create_sg(
   const cugraph_resource_handle_t* handle,
   const cugraph_graph_properties_t* properties,
+  const cugraph_type_erased_device_array_view_t* vertices,
   const cugraph_type_erased_device_array_view_t* src,
   const cugraph_type_erased_device_array_view_t* dst,
   const cugraph_type_erased_device_array_view_t* weights,
@@ -462,6 +512,8 @@ extern "C" cugraph_error_code_t cugraph_sg_graph_create(
   const cugraph_type_erased_device_array_view_t* edge_type_ids,
   bool_t store_transposed,
   bool_t renumber,
+  bool_t drop_self_loops,
+  bool_t drop_multi_edges,
   bool_t do_expensive_check,
   cugraph_graph_t** graph,
   cugraph_error_t** error)
@@ -473,6 +525,8 @@ extern "C" cugraph_error_code_t cugraph_sg_graph_create(
   *error = nullptr;
 
   auto p_handle = reinterpret_cast<cugraph::c_api::cugraph_resource_handle_t const*>(handle);
+  auto p_vertices =
+    reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(vertices);
   auto p_src =
     reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(src);
   auto p_dst =
@@ -488,6 +542,12 @@ extern "C" cugraph_error_code_t cugraph_sg_graph_create(
                CUGRAPH_INVALID_INPUT,
                "Invalid input arguments: src size != dst size.",
                *error);
+
+  CAPI_EXPECTS((p_vertices == nullptr) || (p_src->type_ == p_vertices->type_),
+               CUGRAPH_INVALID_INPUT,
+               "Invalid input arguments: src type != vertices type.",
+               *error);
+
   CAPI_EXPECTS(p_src->type_ == p_dst->type_,
                CUGRAPH_INVALID_INPUT,
                "Invalid input arguments: src type != dst type.",
@@ -533,12 +593,15 @@ extern "C" cugraph_error_code_t cugraph_sg_graph_create(
 
   ::create_graph_functor functor(*p_handle->handle_,
                                  properties,
+                                 p_vertices,
                                  p_src,
                                  p_dst,
                                  p_weights,
                                  p_edge_ids,
                                  p_edge_type_ids,
                                  renumber,
+                                 drop_self_loops,
+                                 drop_multi_edges,
                                  do_expensive_check,
                                  edge_type);
 
@@ -565,7 +628,38 @@ extern "C" cugraph_error_code_t cugraph_sg_graph_create(
   return CUGRAPH_SUCCESS;
 }
 
-cugraph_error_code_t cugraph_sg_graph_create_from_csr(
+extern "C" cugraph_error_code_t cugraph_sg_graph_create(
+  const cugraph_resource_handle_t* handle,
+  const cugraph_graph_properties_t* properties,
+  const cugraph_type_erased_device_array_view_t* src,
+  const cugraph_type_erased_device_array_view_t* dst,
+  const cugraph_type_erased_device_array_view_t* weights,
+  const cugraph_type_erased_device_array_view_t* edge_ids,
+  const cugraph_type_erased_device_array_view_t* edge_type_ids,
+  bool_t store_transposed,
+  bool_t renumber,
+  bool_t do_expensive_check,
+  cugraph_graph_t** graph,
+  cugraph_error_t** error)
+{
+  return cugraph_graph_create_sg(handle,
+                                 properties,
+                                 NULL,
+                                 src,
+                                 dst,
+                                 weights,
+                                 edge_ids,
+                                 edge_type_ids,
+                                 store_transposed,
+                                 renumber,
+                                 FALSE,
+                                 FALSE,
+                                 do_expensive_check,
+                                 graph,
+                                 error);
+}
+
+cugraph_error_code_t cugraph_graph_create_sg_from_csr(
   const cugraph_resource_handle_t* handle,
   const cugraph_graph_properties_t* properties,
   const cugraph_type_erased_device_array_view_t* offsets,
@@ -662,23 +756,55 @@ cugraph_error_code_t cugraph_sg_graph_create_from_csr(
   return CUGRAPH_SUCCESS;
 }
 
-extern "C" void cugraph_sg_graph_free(cugraph_graph_t* ptr_graph)
+cugraph_error_code_t cugraph_sg_graph_create_from_csr(
+  const cugraph_resource_handle_t* handle,
+  const cugraph_graph_properties_t* properties,
+  const cugraph_type_erased_device_array_view_t* offsets,
+  const cugraph_type_erased_device_array_view_t* indices,
+  const cugraph_type_erased_device_array_view_t* weights,
+  const cugraph_type_erased_device_array_view_t* edge_ids,
+  const cugraph_type_erased_device_array_view_t* edge_type_ids,
+  bool_t store_transposed,
+  bool_t renumber,
+  bool_t do_expensive_check,
+  cugraph_graph_t** graph,
+  cugraph_error_t** error)
 {
-  auto internal_pointer = reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(ptr_graph);
-
-  destroy_graph_functor functor(internal_pointer->graph_,
-                                internal_pointer->number_map_,
-                                internal_pointer->edge_weights_,
-                                internal_pointer->edge_ids_,
-                                internal_pointer->edge_types_);
-
-  cugraph::c_api::vertex_dispatcher(internal_pointer->vertex_type_,
-                                    internal_pointer->edge_type_,
-                                    internal_pointer->weight_type_,
-                                    internal_pointer->edge_type_id_type_,
-                                    internal_pointer->store_transposed_,
-                                    internal_pointer->multi_gpu_,
-                                    functor);
-
-  delete internal_pointer;
+  return cugraph_graph_create_sg_from_csr(handle,
+                                          properties,
+                                          offsets,
+                                          indices,
+                                          weights,
+                                          edge_ids,
+                                          edge_type_ids,
+                                          store_transposed,
+                                          renumber,
+                                          do_expensive_check,
+                                          graph,
+                                          error);
 }
+
+extern "C" void cugraph_graph_free(cugraph_graph_t* ptr_graph)
+{
+  if (ptr_graph != NULL) {
+    auto internal_pointer = reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(ptr_graph);
+
+    destroy_graph_functor functor(internal_pointer->graph_,
+                                  internal_pointer->number_map_,
+                                  internal_pointer->edge_weights_,
+                                  internal_pointer->edge_ids_,
+                                  internal_pointer->edge_types_);
+
+    cugraph::c_api::vertex_dispatcher(internal_pointer->vertex_type_,
+                                      internal_pointer->edge_type_,
+                                      internal_pointer->weight_type_,
+                                      internal_pointer->edge_type_id_type_,
+                                      internal_pointer->store_transposed_,
+                                      internal_pointer->multi_gpu_,
+                                      functor);
+
+    delete internal_pointer;
+  }
+}
+
+extern "C" void cugraph_sg_graph_free(cugraph_graph_t* ptr_graph) { cugraph_graph_free(ptr_graph); }
diff --git a/cpp/src/c_api/induced_subgraph.cpp b/cpp/src/c_api/induced_subgraph.cpp
index a1bbcb60825..ac56301e231 100644
--- a/cpp/src/c_api/induced_subgraph.cpp
+++ b/cpp/src/c_api/induced_subgraph.cpp
@@ -147,11 +147,14 @@ struct induced_subgraph_functor : public cugraph::c_api::abstract_functor {
         graph_view.vertex_partition_range_lasts(),
         do_expensive_check_);
 
+      // FIXME: Add support for edge_id and edge_type_id.
       result_ = new cugraph::c_api::cugraph_induced_subgraph_result_t{
         new cugraph::c_api::cugraph_type_erased_device_array_t(src, graph_->vertex_type_),
         new cugraph::c_api::cugraph_type_erased_device_array_t(dst, graph_->vertex_type_),
         wgt ? new cugraph::c_api::cugraph_type_erased_device_array_t(*wgt, graph_->weight_type_)
             : NULL,
+        NULL,
+        NULL,
         new cugraph::c_api::cugraph_type_erased_device_array_t(graph_offsets, SIZE_T)};
     }
   }
diff --git a/cpp/src/c_api/induced_subgraph_result.cpp b/cpp/src/c_api/induced_subgraph_result.cpp
index b9ad0e0d66f..5226872d404 100644
--- a/cpp/src/c_api/induced_subgraph_result.cpp
+++ b/cpp/src/c_api/induced_subgraph_result.cpp
@@ -45,6 +45,28 @@ extern "C" cugraph_type_erased_device_array_view_t* cugraph_induced_subgraph_get
                internal_pointer->wgt_->view());
 }
 
+extern "C" cugraph_type_erased_device_array_view_t* cugraph_induced_subgraph_get_edge_ids(
+  cugraph_induced_subgraph_result_t* induced_subgraph)
+{
+  auto internal_pointer =
+    reinterpret_cast<cugraph::c_api::cugraph_induced_subgraph_result_t*>(induced_subgraph);
+  return (internal_pointer->edge_ids_ == nullptr)
+           ? NULL
+           : reinterpret_cast<cugraph_type_erased_device_array_view_t*>(
+               internal_pointer->edge_ids_->view());
+}
+
+extern "C" cugraph_type_erased_device_array_view_t* cugraph_induced_subgraph_get_edge_type_ids(
+  cugraph_induced_subgraph_result_t* induced_subgraph)
+{
+  auto internal_pointer =
+    reinterpret_cast<cugraph::c_api::cugraph_induced_subgraph_result_t*>(induced_subgraph);
+  return (internal_pointer->edge_type_ids_ == nullptr)
+           ? NULL
+           : reinterpret_cast<cugraph_type_erased_device_array_view_t*>(
+               internal_pointer->edge_type_ids_->view());
+}
+
 extern "C" cugraph_type_erased_device_array_view_t* cugraph_induced_subgraph_get_subgraph_offsets(
   cugraph_induced_subgraph_result_t* induced_subgraph)
 {
@@ -62,6 +84,8 @@ extern "C" void cugraph_induced_subgraph_result_free(
   delete internal_pointer->src_;
   delete internal_pointer->dst_;
   delete internal_pointer->wgt_;
+  delete internal_pointer->edge_ids_;
+  delete internal_pointer->edge_type_ids_;
   delete internal_pointer->subgraph_offsets_;
   delete internal_pointer;
 }
diff --git a/cpp/src/c_api/induced_subgraph_result.hpp b/cpp/src/c_api/induced_subgraph_result.hpp
index acc99b617f4..6f02a699605 100644
--- a/cpp/src/c_api/induced_subgraph_result.hpp
+++ b/cpp/src/c_api/induced_subgraph_result.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,6 +25,8 @@ struct cugraph_induced_subgraph_result_t {
   cugraph_type_erased_device_array_t* src_{};
   cugraph_type_erased_device_array_t* dst_{};
   cugraph_type_erased_device_array_t* wgt_{};
+  cugraph_type_erased_device_array_t* edge_ids_{};
+  cugraph_type_erased_device_array_t* edge_type_ids_{};
   cugraph_type_erased_device_array_t* subgraph_offsets_{};
 };
 
diff --git a/cpp/src/c_api/legacy_k_truss.cpp b/cpp/src/c_api/legacy_k_truss.cpp
index 90e0894783a..90db9fc133c 100644
--- a/cpp/src/c_api/legacy_k_truss.cpp
+++ b/cpp/src/c_api/legacy_k_truss.cpp
@@ -123,12 +123,15 @@ struct k_truss_functor : public cugraph::c_api::abstract_functor {
       raft::update_device(
         edge_offsets.data(), h_edge_offsets.data(), h_edge_offsets.size(), handle_.get_stream());
 
+      // FIXME: Add support for edge_id and edge_type_id.
       result_ = new cugraph::c_api::cugraph_induced_subgraph_result_t{
         new cugraph::c_api::cugraph_type_erased_device_array_t(result_src, graph_->vertex_type_),
         new cugraph::c_api::cugraph_type_erased_device_array_t(result_dst, graph_->vertex_type_),
         wgt ? new cugraph::c_api::cugraph_type_erased_device_array_t(*result_wgt,
                                                                      graph_->weight_type_)
             : NULL,
+        NULL,
+        NULL,
         new cugraph::c_api::cugraph_type_erased_device_array_t(edge_offsets,
                                                                cugraph_data_type_id_t::SIZE_T)};
     }
diff --git a/cpp/src/c_api/resource_handle.cpp b/cpp/src/c_api/resource_handle.cpp
index 767a6f0add6..75b9537ef49 100644
--- a/cpp/src/c_api/resource_handle.cpp
+++ b/cpp/src/c_api/resource_handle.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -41,3 +41,10 @@ extern "C" int cugraph_resource_handle_get_rank(const cugraph_resource_handle_t*
   auto& comm    = internal->handle_->get_comms();
   return static_cast<int>(comm.get_rank());
 }
+
+extern "C" int cugraph_resource_handle_get_comm_size(const cugraph_resource_handle_t* handle)
+{
+  auto internal = reinterpret_cast<cugraph::c_api::cugraph_resource_handle_t const*>(handle);
+  auto& comm    = internal->handle_->get_comms();
+  return static_cast<int>(comm.get_size());
+}
diff --git a/cpp/src/detail/collect_comm_wrapper.cu b/cpp/src/detail/collect_comm_wrapper.cu
new file mode 100644
index 00000000000..7ce2241c677
--- /dev/null
+++ b/cpp/src/detail/collect_comm_wrapper.cu
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <c_api/capi_helper.hpp>
+#include <structure/detail/structure_utils.cuh>
+
+#include <cugraph/detail/shuffle_wrappers.hpp>
+#include <cugraph/utilities/misc_utils.cuh>
+#include <utilities/collect_comm.cuh>
+
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/sort.h>
+
+namespace cugraph {
+namespace detail {
+
+template <typename T>
+rmm::device_uvector<T> device_allgatherv(raft::handle_t const& handle,
+                                         raft::comms::comms_t const& comm,
+                                         raft::device_span<T const> d_input)
+{
+  auto gathered_v = cugraph::device_allgatherv(handle, comm, d_input);
+
+  return gathered_v;
+}
+
+template rmm::device_uvector<int32_t> device_allgatherv(raft::handle_t const& handle,
+                                                        raft::comms::comms_t const& comm,
+                                                        raft::device_span<int32_t const> d_input);
+
+template rmm::device_uvector<int64_t> device_allgatherv(raft::handle_t const& handle,
+                                                        raft::comms::comms_t const& comm,
+                                                        raft::device_span<int64_t const> d_input);
+
+template rmm::device_uvector<float> device_allgatherv(raft::handle_t const& handle,
+                                                      raft::comms::comms_t const& comm,
+                                                      raft::device_span<float const> d_input);
+
+template rmm::device_uvector<double> device_allgatherv(raft::handle_t const& handle,
+                                                       raft::comms::comms_t const& comm,
+                                                       raft::device_span<double const> d_input);
+
+}  // namespace detail
+}  // namespace cugraph
diff --git a/cpp/src/structure/detail/structure_utils.cuh b/cpp/src/structure/detail/structure_utils.cuh
index 01fbccaa53e..c49b62e4543 100644
--- a/cpp/src/structure/detail/structure_utils.cuh
+++ b/cpp/src/structure/detail/structure_utils.cuh
@@ -21,6 +21,7 @@
 #include <cugraph/utilities/device_functors.cuh>
 #include <cugraph/utilities/error.hpp>
 #include <cugraph/utilities/misc_utils.cuh>
+#include <cugraph/utilities/packed_bool_utils.hpp>
 
 #include <raft/core/handle.hpp>
 #include <raft/util/device_atomics.cuh>
@@ -33,6 +34,7 @@
 #include <thrust/for_each.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
+#include <thrust/iterator/transform_output_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
 #include <thrust/remove.h>
 #include <thrust/scan.h>
@@ -496,6 +498,63 @@ void sort_adjacency_list(raft::handle_t const& handle,
   }
 }
 
-}  // namespace detail
+template <typename comparison_t>
+std::tuple<size_t, rmm::device_uvector<uint32_t>> mark_entries(raft::handle_t const& handle,
+                                                               size_t num_entries,
+                                                               comparison_t comparison)
+{
+  rmm::device_uvector<uint32_t> marked_entries(cugraph::packed_bool_size(num_entries),
+                                               handle.get_stream());
+
+  thrust::tabulate(handle.get_thrust_policy(),
+                   marked_entries.begin(),
+                   marked_entries.end(),
+                   [comparison, num_entries] __device__(size_t idx) {
+                     auto word          = cugraph::packed_bool_empty_mask();
+                     size_t start_index = idx * cugraph::packed_bools_per_word();
+                     size_t bits_in_this_word =
+                       (start_index + cugraph::packed_bools_per_word() < num_entries)
+                         ? cugraph::packed_bools_per_word()
+                         : (num_entries - start_index);
+
+                     for (size_t bit = 0; bit < bits_in_this_word; ++bit) {
+                       if (comparison(start_index + bit)) word |= cugraph::packed_bool_mask(bit);
+                     }
+
+                     return word;
+                   });
+
+  size_t bit_count = thrust::transform_reduce(
+    handle.get_thrust_policy(),
+    marked_entries.begin(),
+    marked_entries.end(),
+    [] __device__(auto word) { return __popc(word); },
+    size_t{0},
+    thrust::plus<size_t>());
+
+  return std::make_tuple(bit_count, std::move(marked_entries));
+}
 
+template <typename T>
+rmm::device_uvector<T> remove_flagged_elements(raft::handle_t const& handle,
+                                               rmm::device_uvector<T>&& vector,
+                                               raft::device_span<uint32_t const> remove_flags,
+                                               size_t remove_count)
+{
+  rmm::device_uvector<T> result(vector.size() - remove_count, handle.get_stream());
+
+  thrust::copy_if(
+    handle.get_thrust_policy(),
+    thrust::make_counting_iterator(size_t{0}),
+    thrust::make_counting_iterator(vector.size()),
+    thrust::make_transform_output_iterator(result.begin(),
+                                           indirection_t<size_t, T*>{vector.data()}),
+    [remove_flags] __device__(size_t i) {
+      return !(remove_flags[cugraph::packed_bool_offset(i)] & cugraph::packed_bool_mask(i));
+    });
+
+  return result;
+}
+
+}  // namespace detail
 }  // namespace cugraph
diff --git a/cpp/src/structure/remove_multi_edges.cu b/cpp/src/structure/remove_multi_edges.cu
new file mode 100644
index 00000000000..ba07d068c0e
--- /dev/null
+++ b/cpp/src/structure/remove_multi_edges.cu
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <structure/remove_multi_edges_impl.cuh>
+
+namespace cugraph {
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
+remove_multi_edges(raft::handle_t const& handle,
+                   rmm::device_uvector<int32_t>&& edgelist_srcs,
+                   rmm::device_uvector<int32_t>&& edgelist_dsts,
+                   std::optional<rmm::device_uvector<float>>&& edgelist_weights,
+                   std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_ids,
+                   std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
+remove_multi_edges(raft::handle_t const& handle,
+                   rmm::device_uvector<int32_t>&& edgelist_srcs,
+                   rmm::device_uvector<int32_t>&& edgelist_dsts,
+                   std::optional<rmm::device_uvector<float>>&& edgelist_weights,
+                   std::optional<rmm::device_uvector<int64_t>>&& edgelist_edge_ids,
+                   std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
+remove_multi_edges(raft::handle_t const& handle,
+                   rmm::device_uvector<int64_t>&& edgelist_srcs,
+                   rmm::device_uvector<int64_t>&& edgelist_dsts,
+                   std::optional<rmm::device_uvector<float>>&& edgelist_weights,
+                   std::optional<rmm::device_uvector<int64_t>>&& edgelist_edge_ids,
+                   std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
+remove_multi_edges(raft::handle_t const& handle,
+                   rmm::device_uvector<int32_t>&& edgelist_srcs,
+                   rmm::device_uvector<int32_t>&& edgelist_dsts,
+                   std::optional<rmm::device_uvector<double>>&& edgelist_weights,
+                   std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_ids,
+                   std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
+remove_multi_edges(raft::handle_t const& handle,
+                   rmm::device_uvector<int32_t>&& edgelist_srcs,
+                   rmm::device_uvector<int32_t>&& edgelist_dsts,
+                   std::optional<rmm::device_uvector<double>>&& edgelist_weights,
+                   std::optional<rmm::device_uvector<int64_t>>&& edgelist_edge_ids,
+                   std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
+remove_multi_edges(raft::handle_t const& handle,
+                   rmm::device_uvector<int64_t>&& edgelist_srcs,
+                   rmm::device_uvector<int64_t>&& edgelist_dsts,
+                   std::optional<rmm::device_uvector<double>>&& edgelist_weights,
+                   std::optional<rmm::device_uvector<int64_t>>&& edgelist_edge_ids,
+                   std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types);
+
+}  // namespace cugraph
diff --git a/cpp/src/structure/remove_multi_edges_impl.cuh b/cpp/src/structure/remove_multi_edges_impl.cuh
new file mode 100644
index 00000000000..ab6b1fba8eb
--- /dev/null
+++ b/cpp/src/structure/remove_multi_edges_impl.cuh
@@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <structure/detail/structure_utils.cuh>
+
+#include <cugraph/utilities/dataframe_buffer.hpp>
+// FIXME: mem_frugal_partition should probably not be in shuffle_comm.hpp
+//        It's used here without any notion of shuffling
+#include <cugraph/utilities/shuffle_comm.cuh>
+
+#include <cuco/hash_functions.cuh>
+#include <raft/core/device_span.hpp>
+#include <raft/core/handle.hpp>
+#include <raft/util/device_atomics.cuh>
+#include <rmm/device_uvector.hpp>
+
+#include <thrust/binary_search.h>
+#include <thrust/distance.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/sort.h>
+#include <thrust/tuple.h>
+#include <thrust/unique.h>
+
+#include <algorithm>
+#include <optional>
+
+namespace cugraph {
+
+namespace detail {
+
+template <typename vertex_t>
+struct hash_src_dst_pair {
+  int32_t num_groups;
+
+  int32_t __device__ operator()(thrust::tuple<vertex_t, vertex_t> t) const
+  {
+    vertex_t pair[2];
+    pair[0] = thrust::get<0>(t);
+    pair[1] = thrust::get<1>(t);
+    cuco::detail::MurmurHash3_32<vertex_t*> hash_func{};
+    return hash_func.compute_hash(reinterpret_cast<std::byte*>(pair), 2 * sizeof(vertex_t)) %
+           num_groups;
+  }
+};
+
+template <typename vertex_t>
+std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> group_multi_edges(
+  raft::handle_t const& handle,
+  rmm::device_uvector<vertex_t>&& edgelist_srcs,
+  rmm::device_uvector<vertex_t>&& edgelist_dsts,
+  size_t mem_frugal_threshold)
+{
+  auto pair_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin());
+
+  if (edgelist_srcs.size() > mem_frugal_threshold) {
+    // FIXME: Tuning parameter to address high frequency multi-edges
+    //        Defaulting to 2 which makes the code easier.  If
+    //        num_groups > 2 we can evaluate whether to find a good
+    //        midpoint to do 2 sorts, or if we should do more than 2 sorts.
+    const size_t num_groups{2};
+
+    auto group_counts = groupby_and_count(pair_first,
+                                          pair_first + edgelist_srcs.size(),
+                                          hash_src_dst_pair<vertex_t>{},
+                                          num_groups,
+                                          mem_frugal_threshold,
+                                          handle.get_stream());
+
+    std::vector<size_t> h_group_counts(group_counts.size());
+    raft::update_host(
+      h_group_counts.data(), group_counts.data(), group_counts.size(), handle.get_stream());
+
+    thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + h_group_counts[0]);
+    thrust::sort(handle.get_thrust_policy(),
+                 pair_first + h_group_counts[0],
+                 pair_first + edgelist_srcs.size());
+  } else {
+    thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + edgelist_srcs.size());
+  }
+
+  return std::make_tuple(std::move(edgelist_srcs), std::move(edgelist_dsts));
+}
+
+template <typename vertex_t, typename edge_value_t>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           decltype(allocate_dataframe_buffer<edge_value_t>(size_t{0}, rmm::cuda_stream_view{}))>
+group_multi_edges(
+  raft::handle_t const& handle,
+  rmm::device_uvector<vertex_t>&& edgelist_srcs,
+  rmm::device_uvector<vertex_t>&& edgelist_dsts,
+  decltype(allocate_dataframe_buffer<edge_value_t>(0, rmm::cuda_stream_view{}))&& edgelist_values,
+  size_t mem_frugal_threshold)
+{
+  auto pair_first  = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin());
+  auto value_first = get_dataframe_buffer_begin(edgelist_values);
+
+  if (edgelist_srcs.size() > mem_frugal_threshold) {
+    // FIXME: Tuning parameter to address high frequency multi-edges
+    //        Defaulting to 2 which makes the code easier.  If
+    //        num_groups > 2 we can evaluate whether to find a good
+    //        midpoint to do 2 sorts, or if we should do more than 2 sorts.
+    const size_t num_groups{2};
+
+    auto group_counts = groupby_and_count(pair_first,
+                                          pair_first + edgelist_srcs.size(),
+                                          value_first,
+                                          hash_src_dst_pair<vertex_t>{},
+                                          num_groups,
+                                          mem_frugal_threshold,
+                                          handle.get_stream());
+
+    std::vector<size_t> h_group_counts(group_counts.size());
+    raft::update_host(
+      h_group_counts.data(), group_counts.data(), group_counts.size(), handle.get_stream());
+
+    thrust::sort_by_key(handle.get_thrust_policy(),
+                        pair_first,
+                        pair_first + h_group_counts[0],
+                        get_dataframe_buffer_begin(edgelist_values));
+    thrust::sort_by_key(handle.get_thrust_policy(),
+                        pair_first + h_group_counts[0],
+                        pair_first + edgelist_srcs.size(),
+                        get_dataframe_buffer_begin(edgelist_values) + h_group_counts[0]);
+  } else {
+    thrust::sort_by_key(handle.get_thrust_policy(),
+                        pair_first,
+                        pair_first + edgelist_srcs.size(),
+                        get_dataframe_buffer_begin(edgelist_values));
+  }
+
+  return std::make_tuple(
+    std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_values));
+}
+
+}  // namespace detail
+
+template <typename vertex_t, typename edge_t, typename weight_t, typename edge_type_t>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           std::optional<rmm::device_uvector<weight_t>>,
+           std::optional<rmm::device_uvector<edge_t>>,
+           std::optional<rmm::device_uvector<edge_type_t>>>
+remove_multi_edges(raft::handle_t const& handle,
+                   rmm::device_uvector<vertex_t>&& edgelist_srcs,
+                   rmm::device_uvector<vertex_t>&& edgelist_dsts,
+                   std::optional<rmm::device_uvector<weight_t>>&& edgelist_weights,
+                   std::optional<rmm::device_uvector<edge_t>>&& edgelist_edge_ids,
+                   std::optional<rmm::device_uvector<edge_type_t>>&& edgelist_edge_types)
+{
+  auto total_global_mem = handle.get_device_properties().totalGlobalMem;
+  size_t element_size   = sizeof(vertex_t) * 2;
+  if (edgelist_weights) { element_size += sizeof(weight_t); }
+  if (edgelist_edge_ids) { element_size += sizeof(edge_t); }
+  if (edgelist_edge_types) { element_size += sizeof(edge_type_t); }
+
+  auto constexpr mem_frugal_ratio =
+    0.25;  // if the expected temporary buffer size exceeds the mem_frugal_ratio of the
+           // total_global_mem, switch to the memory frugal approach
+  auto mem_frugal_threshold =
+    static_cast<size_t>(static_cast<double>(total_global_mem / element_size) * mem_frugal_ratio);
+
+  if (edgelist_weights) {
+    if (edgelist_edge_ids) {
+      if (edgelist_edge_types) {
+        std::forward_as_tuple(edgelist_srcs,
+                              edgelist_dsts,
+                              std::tie(edgelist_weights, edgelist_edge_ids, edgelist_edge_types)) =
+          detail::group_multi_edges<vertex_t, thrust::tuple<weight_t, edge_t, edge_type_t>>(
+            handle,
+            std::move(edgelist_srcs),
+            std::move(edgelist_dsts),
+            std::make_tuple(std::move(*edgelist_weights),
+                            std::move(*edgelist_edge_ids),
+                            std::move(*edgelist_edge_types)),
+            mem_frugal_threshold);
+      } else {
+        std::forward_as_tuple(
+          edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights, edgelist_edge_ids)) =
+          detail::group_multi_edges<vertex_t, thrust::tuple<weight_t, edge_t>>(
+            handle,
+            std::move(edgelist_srcs),
+            std::move(edgelist_dsts),
+            std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_ids)),
+            mem_frugal_threshold);
+      }
+    } else {
+      if (edgelist_edge_types) {
+        std::forward_as_tuple(
+          edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights, edgelist_edge_types)) =
+          detail::group_multi_edges<vertex_t, thrust::tuple<weight_t, edge_type_t>>(
+            handle,
+            std::move(edgelist_srcs),
+            std::move(edgelist_dsts),
+            std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_types)),
+            mem_frugal_threshold);
+      } else {
+        std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights)) =
+          detail::group_multi_edges<vertex_t, thrust::tuple<weight_t>>(
+            handle,
+            std::move(edgelist_srcs),
+            std::move(edgelist_dsts),
+            std::make_tuple(std::move(*edgelist_weights)),
+            mem_frugal_threshold);
+      }
+    }
+  } else {
+    if (edgelist_edge_ids) {
+      if (edgelist_edge_types) {
+        std::forward_as_tuple(
+          edgelist_srcs, edgelist_dsts, std::tie(edgelist_edge_ids, edgelist_edge_types)) =
+          detail::group_multi_edges<vertex_t, thrust::tuple<edge_t, edge_type_t>>(
+            handle,
+            std::move(edgelist_srcs),
+            std::move(edgelist_dsts),
+            std::make_tuple(std::move(*edgelist_edge_ids), std::move(*edgelist_edge_types)),
+            mem_frugal_threshold);
+      } else {
+        std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_edge_ids)) =
+          detail::group_multi_edges<vertex_t, thrust::tuple<edge_t>>(
+            handle,
+            std::move(edgelist_srcs),
+            std::move(edgelist_dsts),
+            std::make_tuple(std::move(*edgelist_edge_ids)),
+            mem_frugal_threshold);
+      }
+    } else {
+      if (edgelist_edge_types) {
+        std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_edge_types)) =
+          detail::group_multi_edges<vertex_t, thrust::tuple<edge_type_t>>(
+            handle,
+            std::move(edgelist_srcs),
+            std::move(edgelist_dsts),
+            std::make_tuple(std::move(*edgelist_edge_types)),
+            mem_frugal_threshold);
+      } else {
+        std::tie(edgelist_srcs, edgelist_dsts) = detail::group_multi_edges(
+          handle, std::move(edgelist_srcs), std::move(edgelist_dsts), mem_frugal_threshold);
+      }
+    }
+  }
+
+  auto [multi_edge_count, multi_edges_to_delete] =
+    detail::mark_entries(handle,
+                         edgelist_srcs.size(),
+                         [d_edgelist_srcs = edgelist_srcs.data(),
+                          d_edgelist_dsts = edgelist_dsts.data()] __device__(auto idx) {
+                           return (idx > 0) && (d_edgelist_srcs[idx - 1] == d_edgelist_srcs[idx]) &&
+                                  (d_edgelist_dsts[idx - 1] == d_edgelist_dsts[idx]);
+                         });
+
+  if (multi_edge_count > 0) {
+    edgelist_srcs = detail::remove_flagged_elements(
+      handle,
+      std::move(edgelist_srcs),
+      raft::device_span<uint32_t const>{multi_edges_to_delete.data(), multi_edges_to_delete.size()},
+      multi_edge_count);
+    edgelist_dsts = detail::remove_flagged_elements(
+      handle,
+      std::move(edgelist_dsts),
+      raft::device_span<uint32_t const>{multi_edges_to_delete.data(), multi_edges_to_delete.size()},
+      multi_edge_count);
+
+    if (edgelist_weights)
+      edgelist_weights = detail::remove_flagged_elements(
+        handle,
+        std::move(*edgelist_weights),
+        raft::device_span<uint32_t const>{multi_edges_to_delete.data(),
+                                          multi_edges_to_delete.size()},
+        multi_edge_count);
+
+    if (edgelist_edge_ids)
+      edgelist_edge_ids = detail::remove_flagged_elements(
+        handle,
+        std::move(*edgelist_edge_ids),
+        raft::device_span<uint32_t const>{multi_edges_to_delete.data(),
+                                          multi_edges_to_delete.size()},
+        multi_edge_count);
+
+    if (edgelist_edge_types)
+      edgelist_edge_types = detail::remove_flagged_elements(
+        handle,
+        std::move(*edgelist_edge_types),
+        raft::device_span<uint32_t const>{multi_edges_to_delete.data(),
+                                          multi_edges_to_delete.size()},
+        multi_edge_count);
+  }
+
+  return std::make_tuple(std::move(edgelist_srcs),
+                         std::move(edgelist_dsts),
+                         std::move(edgelist_weights),
+                         std::move(edgelist_edge_ids),
+                         std::move(edgelist_edge_types));
+}
+
+}  // namespace cugraph
diff --git a/cpp/src/structure/remove_self_loops.cu b/cpp/src/structure/remove_self_loops.cu
new file mode 100644
index 00000000000..8a66c1e05e3
--- /dev/null
+++ b/cpp/src/structure/remove_self_loops.cu
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <structure/remove_self_loops_impl.cuh>
+
+namespace cugraph {
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
+remove_self_loops(raft::handle_t const& handle,
+                  rmm::device_uvector<int32_t>&& edgelist_srcs,
+                  rmm::device_uvector<int32_t>&& edgelist_dsts,
+                  std::optional<rmm::device_uvector<float>>&& edgelist_weights,
+                  std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_ids,
+                  std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
+remove_self_loops(raft::handle_t const& handle,
+                  rmm::device_uvector<int32_t>&& edgelist_srcs,
+                  rmm::device_uvector<int32_t>&& edgelist_dsts,
+                  std::optional<rmm::device_uvector<float>>&& edgelist_weights,
+                  std::optional<rmm::device_uvector<int64_t>>&& edgelist_edge_ids,
+                  std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
+remove_self_loops(raft::handle_t const& handle,
+                  rmm::device_uvector<int64_t>&& edgelist_srcs,
+                  rmm::device_uvector<int64_t>&& edgelist_dsts,
+                  std::optional<rmm::device_uvector<float>>&& edgelist_weights,
+                  std::optional<rmm::device_uvector<int64_t>>&& edgelist_edge_ids,
+                  std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
+remove_self_loops(raft::handle_t const& handle,
+                  rmm::device_uvector<int32_t>&& edgelist_srcs,
+                  rmm::device_uvector<int32_t>&& edgelist_dsts,
+                  std::optional<rmm::device_uvector<double>>&& edgelist_weights,
+                  std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_ids,
+                  std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
+remove_self_loops(raft::handle_t const& handle,
+                  rmm::device_uvector<int32_t>&& edgelist_srcs,
+                  rmm::device_uvector<int32_t>&& edgelist_dsts,
+                  std::optional<rmm::device_uvector<double>>&& edgelist_weights,
+                  std::optional<rmm::device_uvector<int64_t>>&& edgelist_edge_ids,
+                  std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
+remove_self_loops(raft::handle_t const& handle,
+                  rmm::device_uvector<int64_t>&& edgelist_srcs,
+                  rmm::device_uvector<int64_t>&& edgelist_dsts,
+                  std::optional<rmm::device_uvector<double>>&& edgelist_weights,
+                  std::optional<rmm::device_uvector<int64_t>>&& edgelist_edge_ids,
+                  std::optional<rmm::device_uvector<int32_t>>&& edgelist_edge_types);
+
+}  // namespace cugraph
diff --git a/cpp/src/structure/remove_self_loops_impl.cuh b/cpp/src/structure/remove_self_loops_impl.cuh
new file mode 100644
index 00000000000..161ffeae28e
--- /dev/null
+++ b/cpp/src/structure/remove_self_loops_impl.cuh
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <structure/detail/structure_utils.cuh>
+
+#include <raft/core/handle.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <thrust/count.h>
+#include <thrust/distance.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/remove.h>
+#include <thrust/tuple.h>
+
+#include <algorithm>
+#include <optional>
+
+namespace cugraph {
+
+template <typename vertex_t, typename edge_t, typename weight_t, typename edge_type_t>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           std::optional<rmm::device_uvector<weight_t>>,
+           std::optional<rmm::device_uvector<edge_t>>,
+           std::optional<rmm::device_uvector<edge_type_t>>>
+remove_self_loops(raft::handle_t const& handle,
+                  rmm::device_uvector<vertex_t>&& edgelist_srcs,
+                  rmm::device_uvector<vertex_t>&& edgelist_dsts,
+                  std::optional<rmm::device_uvector<weight_t>>&& edgelist_weights,
+                  std::optional<rmm::device_uvector<edge_t>>&& edgelist_edge_ids,
+                  std::optional<rmm::device_uvector<edge_type_t>>&& edgelist_edge_types)
+{
+  auto [self_loop_count, self_loops_to_delete] =
+    detail::mark_entries(handle,
+                         edgelist_srcs.size(),
+                         [d_srcs = edgelist_srcs.data(), d_dsts = edgelist_dsts.data()] __device__(
+                           size_t i) { return d_srcs[i] == d_dsts[i]; });
+
+  if (self_loop_count > 0) {
+    edgelist_srcs = detail::remove_flagged_elements(
+      handle,
+      std::move(edgelist_srcs),
+      raft::device_span<uint32_t const>{self_loops_to_delete.data(), self_loops_to_delete.size()},
+      self_loop_count);
+    edgelist_dsts = detail::remove_flagged_elements(
+      handle,
+      std::move(edgelist_dsts),
+      raft::device_span<uint32_t const>{self_loops_to_delete.data(), self_loops_to_delete.size()},
+      self_loop_count);
+
+    if (edgelist_weights)
+      edgelist_weights = detail::remove_flagged_elements(
+        handle,
+        std::move(*edgelist_weights),
+        raft::device_span<uint32_t const>{self_loops_to_delete.data(), self_loops_to_delete.size()},
+        self_loop_count);
+
+    if (edgelist_edge_ids)
+      edgelist_edge_ids = detail::remove_flagged_elements(
+        handle,
+        std::move(*edgelist_edge_ids),
+        raft::device_span<uint32_t const>{self_loops_to_delete.data(), self_loops_to_delete.size()},
+        self_loop_count);
+
+    if (edgelist_edge_types)
+      edgelist_edge_types = detail::remove_flagged_elements(
+        handle,
+        std::move(*edgelist_edge_types),
+        raft::device_span<uint32_t const>{self_loops_to_delete.data(), self_loops_to_delete.size()},
+        self_loop_count);
+  }
+
+  return std::make_tuple(std::move(edgelist_srcs),
+                         std::move(edgelist_dsts),
+                         std::move(edgelist_weights),
+                         std::move(edgelist_edge_ids),
+                         std::move(edgelist_edge_types));
+}
+
+}  // namespace cugraph
diff --git a/cpp/tests/c_api/create_graph_test.c b/cpp/tests/c_api/create_graph_test.c
index 736db761ebd..11da2eb8589 100644
--- a/cpp/tests/c_api/create_graph_test.c
+++ b/cpp/tests/c_api/create_graph_test.c
@@ -91,8 +91,9 @@ int test_create_sg_graph_simple()
     handle, wgt_view, (byte_t*)h_wgt, &ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt copy_from_host failed.");
 
-  ret_code = cugraph_sg_graph_create(handle,
+  ret_code = cugraph_graph_create_sg(handle,
                                      &properties,
+                                     NULL,
                                      src_view,
                                      dst_view,
                                      wgt_view,
@@ -101,11 +102,13 @@ int test_create_sg_graph_simple()
                                      FALSE,
                                      FALSE,
                                      FALSE,
+                                     FALSE,
+                                     FALSE,
                                      &graph,
                                      &ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed.");
 
-  cugraph_sg_graph_free(graph);
+  cugraph_graph_free(graph);
 
   cugraph_type_erased_device_array_view_free(wgt_view);
   cugraph_type_erased_device_array_view_free(dst_view);
@@ -300,7 +303,7 @@ int test_create_sg_graph_csr()
   }
 
   cugraph_sample_result_free(result);
-  cugraph_sg_graph_free(graph);
+  cugraph_graph_free(graph);
   cugraph_type_erased_device_array_view_free(wgt_view);
   cugraph_type_erased_device_array_view_free(indices_view);
   cugraph_type_erased_device_array_view_free(offsets_view);
@@ -382,8 +385,9 @@ int test_create_sg_graph_symmetric_error()
     handle, wgt_view, (byte_t*)h_wgt, &ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt copy_from_host failed.");
 
-  ret_code = cugraph_sg_graph_create(handle,
+  ret_code = cugraph_graph_create_sg(handle,
                                      &properties,
+                                     NULL,
                                      src_view,
                                      dst_view,
                                      wgt_view,
@@ -391,19 +395,500 @@ int test_create_sg_graph_symmetric_error()
                                      NULL,
                                      FALSE,
                                      FALSE,
+                                     FALSE,
+                                     FALSE,
                                      TRUE,
                                      &graph,
                                      &ret_error);
   TEST_ASSERT(test_ret_value, ret_code != CUGRAPH_SUCCESS, "graph creation succeeded but should have failed.");
 
-  if (ret_code == CUGRAPH_SUCCESS) cugraph_sg_graph_free(graph);
+  if (ret_code == CUGRAPH_SUCCESS) cugraph_graph_free(graph);
+
+  cugraph_type_erased_device_array_view_free(wgt_view);
+  cugraph_type_erased_device_array_view_free(dst_view);
+  cugraph_type_erased_device_array_view_free(src_view);
+  cugraph_type_erased_device_array_free(wgt);
+  cugraph_type_erased_device_array_free(dst);
+  cugraph_type_erased_device_array_free(src);
+
+  cugraph_free_resource_handle(handle);
+  cugraph_error_free(ret_error);
+
+  return test_ret_value;
+}
+
+int test_create_sg_graph_with_isolated_vertices()
+{
+  int test_ret_value = 0;
+
+  typedef int32_t vertex_t;
+  typedef int32_t edge_t;
+  typedef float weight_t;
+
+  cugraph_error_code_t ret_code = CUGRAPH_SUCCESS;
+  cugraph_error_t* ret_error;
+  size_t num_edges    = 8;
+  size_t num_vertices = 7;
+  double alpha = 0.95;
+  double epsilon = 0.0001;
+  size_t max_iterations = 20;
+
+  vertex_t h_vertices[] = { 0, 1, 2, 3, 4, 5, 6 };
+  vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4};
+  vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5};
+  weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f};
+  weight_t h_result[] = { 0.0859168, 0.158029, 0.0616337, 0.179675, 0.113239, 0.339873, 0.0616337 };
+
+  cugraph_resource_handle_t* handle = NULL;
+  cugraph_graph_t* graph            = NULL;
+  cugraph_graph_properties_t properties;
+
+  properties.is_symmetric  = FALSE;
+  properties.is_multigraph = FALSE;
+
+  data_type_id_t vertex_tid = INT32;
+  data_type_id_t edge_tid   = INT32;
+  data_type_id_t weight_tid = FLOAT32;
+
+  handle = cugraph_create_resource_handle(NULL);
+  TEST_ASSERT(test_ret_value, handle != NULL, "resource handle creation failed.");
+
+  cugraph_type_erased_device_array_t* vertices;
+  cugraph_type_erased_device_array_t* src;
+  cugraph_type_erased_device_array_t* dst;
+  cugraph_type_erased_device_array_t* wgt;
+  cugraph_type_erased_device_array_view_t* vertices_view;
+  cugraph_type_erased_device_array_view_t* src_view;
+  cugraph_type_erased_device_array_view_t* dst_view;
+  cugraph_type_erased_device_array_view_t* wgt_view;
+
+  ret_code =
+    cugraph_type_erased_device_array_create(handle, num_vertices, vertex_tid, &vertices, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "vertices create failed.");
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
+
+  ret_code =
+    cugraph_type_erased_device_array_create(handle, num_edges, vertex_tid, &src, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src create failed.");
+
+  ret_code =
+    cugraph_type_erased_device_array_create(handle, num_edges, vertex_tid, &dst, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dst create failed.");
+
+  ret_code =
+    cugraph_type_erased_device_array_create(handle, num_edges, weight_tid, &wgt, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt create failed.");
+
+  vertices_view = cugraph_type_erased_device_array_view(vertices);
+  src_view = cugraph_type_erased_device_array_view(src);
+  dst_view = cugraph_type_erased_device_array_view(dst);
+  wgt_view = cugraph_type_erased_device_array_view(wgt);
+
+  ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+    handle, vertices_view, (byte_t*)h_vertices, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "vertices copy_from_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+    handle, src_view, (byte_t*)h_src, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src copy_from_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+    handle, dst_view, (byte_t*)h_dst, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dst copy_from_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+    handle, wgt_view, (byte_t*)h_wgt, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt copy_from_host failed.");
+
+  ret_code = cugraph_graph_create_sg(handle,
+                                     &properties,
+                                     vertices_view,
+                                     src_view,
+                                     dst_view,
+                                     wgt_view,
+                                     NULL,
+                                     NULL,
+                                     FALSE,
+                                     FALSE,
+                                     FALSE,
+                                     FALSE,
+                                     FALSE,
+                                     &graph,
+                                     &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed.");
+
+  cugraph_centrality_result_t* result = NULL;
+
+  // To verify we will call pagerank
+  ret_code = cugraph_pagerank(handle,
+                              graph,
+                              NULL,
+                              NULL,
+                              NULL,
+                              NULL,
+                              alpha,
+                              epsilon,
+                              max_iterations,
+                              FALSE,
+                              &result,
+                              &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_pagerank failed.");
+  TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
+
+  cugraph_type_erased_device_array_view_t* result_vertices;
+  cugraph_type_erased_device_array_view_t* pageranks;
+
+  result_vertices  = cugraph_centrality_result_get_vertices(result);
+  pageranks = cugraph_centrality_result_get_values(result);
+
+  vertex_t h_result_vertices[num_vertices];
+  weight_t h_pageranks[num_vertices];
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    handle, (byte_t*)h_result_vertices, result_vertices, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    handle, (byte_t*)h_pageranks, pageranks, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  for (int i = 0; (i < num_vertices) && (test_ret_value == 0); ++i) {
+    TEST_ASSERT(test_ret_value,
+                nearlyEqual(h_result[h_result_vertices[i]], h_pageranks[i], 0.001),
+                "pagerank results don't match");
+  }
+
+  cugraph_centrality_result_free(result);
+  cugraph_graph_free(graph);
+
+  cugraph_type_erased_device_array_view_free(wgt_view);
+  cugraph_type_erased_device_array_view_free(dst_view);
+  cugraph_type_erased_device_array_view_free(src_view);
+  cugraph_type_erased_device_array_view_free(vertices_view);
+  cugraph_type_erased_device_array_free(wgt);
+  cugraph_type_erased_device_array_free(dst);
+  cugraph_type_erased_device_array_free(src);
+  cugraph_type_erased_device_array_free(vertices);
+
+  cugraph_free_resource_handle(handle);
+  cugraph_error_free(ret_error);
+
+  return test_ret_value;
+}
+
+int test_create_sg_graph_csr_with_isolated()
+{
+  int test_ret_value = 0;
+
+  typedef int32_t vertex_t;
+  typedef int32_t edge_t;
+  typedef float weight_t;
+
+  cugraph_error_code_t ret_code = CUGRAPH_SUCCESS;
+  cugraph_error_t* ret_error;
+  size_t num_edges    = 8;
+  size_t num_vertices = 7;
+  double alpha = 0.95;
+  double epsilon = 0.0001;
+  size_t max_iterations = 20;
+
+  /*
+  vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4};
+  vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5};
+  */
+  edge_t h_offsets[]   = {0, 1, 3, 6, 7, 8, 8, 8};
+  vertex_t h_indices[] = {1, 3, 4, 0, 1, 3, 5, 5};
+  vertex_t h_start[]   = {0, 1, 2, 3, 4, 5};
+  weight_t h_wgt[]     = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f};
+  weight_t h_result[] = { 0.0859168, 0.158029, 0.0616337, 0.179675, 0.113239, 0.339873, 0.0616337 };
+
+  cugraph_resource_handle_t* handle = NULL;
+  cugraph_graph_t* graph            = NULL;
+  cugraph_graph_properties_t properties;
+
+  properties.is_symmetric  = FALSE;
+  properties.is_multigraph = FALSE;
+
+  data_type_id_t vertex_tid = INT32;
+  data_type_id_t edge_tid   = INT32;
+  data_type_id_t weight_tid = FLOAT32;
+
+  handle = cugraph_create_resource_handle(NULL);
+  TEST_ASSERT(test_ret_value, handle != NULL, "resource handle creation failed.");
+
+  cugraph_type_erased_device_array_t* offsets;
+  cugraph_type_erased_device_array_t* indices;
+  cugraph_type_erased_device_array_t* wgt;
+  cugraph_type_erased_device_array_view_t* offsets_view;
+  cugraph_type_erased_device_array_view_t* indices_view;
+  cugraph_type_erased_device_array_view_t* wgt_view;
+
+  ret_code = cugraph_type_erased_device_array_create(
+    handle, num_vertices + 1, vertex_tid, &offsets, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "offsets create failed.");
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
+
+  ret_code =
+    cugraph_type_erased_device_array_create(handle, num_edges, vertex_tid, &indices, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "indices create failed.");
+
+  ret_code =
+    cugraph_type_erased_device_array_create(handle, num_edges, weight_tid, &wgt, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt create failed.");
+
+  offsets_view = cugraph_type_erased_device_array_view(offsets);
+  indices_view = cugraph_type_erased_device_array_view(indices);
+  wgt_view     = cugraph_type_erased_device_array_view(wgt);
+
+  ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+    handle, offsets_view, (byte_t*)h_offsets, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "offsets copy_from_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+    handle, indices_view, (byte_t*)h_indices, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "indices copy_from_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+    handle, wgt_view, (byte_t*)h_wgt, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt copy_from_host failed.");
+
+  ret_code = cugraph_sg_graph_create_from_csr(handle,
+                                              &properties,
+                                              offsets_view,
+                                              indices_view,
+                                              wgt_view,
+                                              NULL,
+                                              NULL,
+                                              FALSE,
+                                              FALSE,
+                                              FALSE,
+                                              &graph,
+                                              &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed.");
+
+  cugraph_centrality_result_t* result = NULL;
+
+  // To verify we will call pagerank
+  ret_code = cugraph_pagerank(handle,
+                              graph,
+                              NULL,
+                              NULL,
+                              NULL,
+                              NULL,
+                              alpha,
+                              epsilon,
+                              max_iterations,
+                              FALSE,
+                              &result,
+                              &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_pagerank failed.");
+  TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
+
+  cugraph_type_erased_device_array_view_t* result_vertices;
+  cugraph_type_erased_device_array_view_t* pageranks;
+
+  result_vertices  = cugraph_centrality_result_get_vertices(result);
+  pageranks = cugraph_centrality_result_get_values(result);
+
+  vertex_t h_result_vertices[num_vertices];
+  weight_t h_pageranks[num_vertices];
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    handle, (byte_t*)h_result_vertices, result_vertices, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    handle, (byte_t*)h_pageranks, pageranks, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  for (int i = 0; (i < num_vertices) && (test_ret_value == 0); ++i) {
+    TEST_ASSERT(test_ret_value,
+                nearlyEqual(h_result[h_result_vertices[i]], h_pageranks[i], 0.001),
+                "pagerank results don't match");
+  }
+
+  cugraph_centrality_result_free(result);
+  cugraph_graph_free(graph);
+  cugraph_type_erased_device_array_view_free(wgt_view);
+  cugraph_type_erased_device_array_view_free(indices_view);
+  cugraph_type_erased_device_array_view_free(offsets_view);
+  cugraph_type_erased_device_array_free(wgt);
+  cugraph_type_erased_device_array_free(indices);
+  cugraph_type_erased_device_array_free(offsets);
+
+  cugraph_free_resource_handle(handle);
+  cugraph_error_free(ret_error);
+
+  return test_ret_value;
+}
+
+int test_create_sg_graph_with_isolated_vertices_multi_input()
+{
+  int test_ret_value = 0;
+
+  typedef int32_t vertex_t;
+  typedef int32_t edge_t;
+  typedef float weight_t;
+
+  cugraph_error_code_t ret_code = CUGRAPH_SUCCESS;
+  cugraph_error_t* ret_error;
+  size_t num_edges    = 66;
+  size_t num_vertices = 7;
+  double alpha = 0.95;
+  double epsilon = 0.0001;
+  size_t max_iterations = 20;
+
+  vertex_t h_vertices[] = { 0, 1, 2, 3, 4, 5, 6 };
+  vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5,
+                      0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5,
+                      0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5,
+                      0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5,
+                      0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5,
+                      0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5};
+  vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 5, 5, 5,
+                      1, 3, 4, 0, 1, 3, 5, 5, 5, 5, 5,
+                      1, 3, 4, 0, 1, 3, 5, 5, 5, 5, 5,
+                      1, 3, 4, 0, 1, 3, 5, 5, 5, 5, 5,
+                      1, 3, 4, 0, 1, 3, 5, 5, 5, 5, 5,
+                      1, 3, 4, 0, 1, 3, 5, 5, 5, 5, 5};
+  weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 3.2f, 3.2f, 1.7f,
+                      0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 3.2f, 3.2f, 1.7f,
+                      0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 3.2f, 3.2f, 1.7f,
+                      0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 3.2f, 3.2f, 1.7f,
+                      0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 3.2f, 3.2f, 1.7f,
+                      0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 3.2f, 3.2f, 1.7f};
+  weight_t h_result[] = { 0.0859168, 0.158029, 0.0616337, 0.179675, 0.113239, 0.339873, 0.0616337 };
+
+  cugraph_resource_handle_t* handle = NULL;
+  cugraph_graph_t* graph            = NULL;
+  cugraph_graph_properties_t properties;
+
+  properties.is_symmetric  = FALSE;
+  properties.is_multigraph = FALSE;
+
+  data_type_id_t vertex_tid = INT32;
+  data_type_id_t edge_tid   = INT32;
+  data_type_id_t weight_tid = FLOAT32;
+
+  handle = cugraph_create_resource_handle(NULL);
+  TEST_ASSERT(test_ret_value, handle != NULL, "resource handle creation failed.");
+
+  cugraph_type_erased_device_array_t* vertices;
+  cugraph_type_erased_device_array_t* src;
+  cugraph_type_erased_device_array_t* dst;
+  cugraph_type_erased_device_array_t* wgt;
+  cugraph_type_erased_device_array_view_t* vertices_view;
+  cugraph_type_erased_device_array_view_t* src_view;
+  cugraph_type_erased_device_array_view_t* dst_view;
+  cugraph_type_erased_device_array_view_t* wgt_view;
+
+  ret_code =
+    cugraph_type_erased_device_array_create(handle, num_vertices, vertex_tid, &vertices, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "vertices create failed.");
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
+
+  ret_code =
+    cugraph_type_erased_device_array_create(handle, num_edges, vertex_tid, &src, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src create failed.");
+
+  ret_code =
+    cugraph_type_erased_device_array_create(handle, num_edges, vertex_tid, &dst, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dst create failed.");
+
+  ret_code =
+    cugraph_type_erased_device_array_create(handle, num_edges, weight_tid, &wgt, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt create failed.");
+
+  vertices_view = cugraph_type_erased_device_array_view(vertices);
+  src_view = cugraph_type_erased_device_array_view(src);
+  dst_view = cugraph_type_erased_device_array_view(dst);
+  wgt_view = cugraph_type_erased_device_array_view(wgt);
+
+  ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+    handle, vertices_view, (byte_t*)h_vertices, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "vertices copy_from_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+    handle, src_view, (byte_t*)h_src, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src copy_from_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+    handle, dst_view, (byte_t*)h_dst, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dst copy_from_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+    handle, wgt_view, (byte_t*)h_wgt, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt copy_from_host failed.");
+
+  ret_code = cugraph_graph_create_sg(handle,
+                                     &properties,
+                                     vertices_view,
+                                     src_view,
+                                     dst_view,
+                                     wgt_view,
+                                     NULL,
+                                     NULL,
+                                     FALSE,
+                                     FALSE,
+                                     TRUE,
+                                     TRUE,
+                                     FALSE,
+                                     &graph,
+                                     &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed.");
+
+  cugraph_centrality_result_t* result = NULL;
+
+  // To verify we will call pagerank
+  ret_code = cugraph_pagerank(handle,
+                              graph,
+                              NULL,
+                              NULL,
+                              NULL,
+                              NULL,
+                              alpha,
+                              epsilon,
+                              max_iterations,
+                              FALSE,
+                              &result,
+                              &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_pagerank failed.");
+  TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
+
+  cugraph_type_erased_device_array_view_t* result_vertices;
+  cugraph_type_erased_device_array_view_t* pageranks;
+
+  result_vertices  = cugraph_centrality_result_get_vertices(result);
+  pageranks = cugraph_centrality_result_get_values(result);
+
+  vertex_t h_result_vertices[num_vertices];
+  weight_t h_pageranks[num_vertices];
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    handle, (byte_t*)h_result_vertices, result_vertices, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    handle, (byte_t*)h_pageranks, pageranks, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  for (int i = 0; (i < num_vertices) && (test_ret_value == 0); ++i) {
+    TEST_ASSERT(test_ret_value,
+                nearlyEqual(h_result[h_result_vertices[i]], h_pageranks[i], 0.001),
+                "pagerank results don't match");
+  }
+
+  cugraph_centrality_result_free(result);
+  cugraph_graph_free(graph);
 
   cugraph_type_erased_device_array_view_free(wgt_view);
   cugraph_type_erased_device_array_view_free(dst_view);
   cugraph_type_erased_device_array_view_free(src_view);
+  cugraph_type_erased_device_array_view_free(vertices_view);
   cugraph_type_erased_device_array_free(wgt);
   cugraph_type_erased_device_array_free(dst);
   cugraph_type_erased_device_array_free(src);
+  cugraph_type_erased_device_array_free(vertices);
 
   cugraph_free_resource_handle(handle);
   cugraph_error_free(ret_error);
@@ -419,5 +904,8 @@ int main(int argc, char** argv)
   result |= RUN_TEST(test_create_sg_graph_simple);
   result |= RUN_TEST(test_create_sg_graph_csr);
   result |= RUN_TEST(test_create_sg_graph_symmetric_error);
+  result |= RUN_TEST(test_create_sg_graph_with_isolated_vertices);
+  result |= RUN_TEST(test_create_sg_graph_csr_with_isolated);
+  result |= RUN_TEST(test_create_sg_graph_with_isolated_vertices_multi_input);
   return result;
 }
diff --git a/cpp/tests/c_api/mg_create_graph_test.c b/cpp/tests/c_api/mg_create_graph_test.c
index 4c8f2f22982..fec319d1881 100644
--- a/cpp/tests/c_api/mg_create_graph_test.c
+++ b/cpp/tests/c_api/mg_create_graph_test.c
@@ -17,6 +17,8 @@
 #include "c_test_utils.h"  /* RUN_TEST */
 #include "mg_test_utils.h" /* RUN_TEST */
 
+#include <cugraph_c/algorithms.h>
+
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -41,7 +43,7 @@ int test_create_mg_graph_simple(const cugraph_resource_handle_t* handle)
   vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5};
   weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f};
 
-  cugraph_graph_t* p_graph = NULL;
+  cugraph_graph_t* graph = NULL;
   cugraph_graph_properties_t properties;
 
   properties.is_symmetric  = FALSE;
@@ -94,21 +96,25 @@ int test_create_mg_graph_simple(const cugraph_resource_handle_t* handle)
     handle, wgt_view, (byte_t*)h_wgt, &ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt copy_from_host failed.");
 
-  ret_code = cugraph_mg_graph_create(handle,
+  ret_code = cugraph_graph_create_mg(handle,
                                      &properties,
-                                     src_view,
-                                     dst_view,
-                                     wgt_view,
+                                     NULL,
+                                     (cugraph_type_erased_device_array_view_t const* const*) &src_view,
+                                     (cugraph_type_erased_device_array_view_t const* const*) &dst_view,
+                                     (cugraph_type_erased_device_array_view_t const* const*) &wgt_view,
                                      NULL,
                                      NULL,
                                      FALSE,
-                                     num_edges,
+                                     1,
+                                     FALSE,
+                                     FALSE,
                                      TRUE,
-                                     &p_graph,
+                                     &graph,
                                      &ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed.");
+  TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
 
-  cugraph_mg_graph_free(p_graph);
+  cugraph_graph_free(graph);
 
   cugraph_type_erased_device_array_view_free(wgt_view);
   cugraph_type_erased_device_array_view_free(dst_view);
@@ -122,6 +128,382 @@ int test_create_mg_graph_simple(const cugraph_resource_handle_t* handle)
   return test_ret_value;
 }
 
+int test_create_mg_graph_multiple_edge_lists(const cugraph_resource_handle_t* handle)
+{
+  int test_ret_value = 0;
+
+  typedef int32_t vertex_t;
+  typedef int32_t edge_t;
+  typedef float weight_t;
+
+  cugraph_error_code_t ret_code = CUGRAPH_SUCCESS;
+  cugraph_error_t* ret_error;
+  size_t num_edges    = 8;
+  size_t num_vertices = 7;
+
+  double alpha          = 0.95;
+  double epsilon        = 0.0001;
+  size_t max_iterations = 20;
+
+  vertex_t h_vertices[] = { 0, 1, 2, 3, 4, 5, 6 };
+  vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4};
+  vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5};
+  weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f};
+  weight_t h_result[] = { 0.0859168, 0.158029, 0.0616337, 0.179675, 0.113239, 0.339873, 0.0616337 };
+
+  cugraph_graph_t* graph = NULL;
+  cugraph_graph_properties_t properties;
+
+  properties.is_symmetric  = FALSE;
+  properties.is_multigraph = FALSE;
+
+  data_type_id_t vertex_tid = INT32;
+  data_type_id_t edge_tid   = INT32;
+  data_type_id_t weight_tid = FLOAT32;
+
+  const size_t num_local_arrays = 2;
+
+  cugraph_type_erased_device_array_t* vertices[num_local_arrays];
+  cugraph_type_erased_device_array_t* src[num_local_arrays];
+  cugraph_type_erased_device_array_t* dst[num_local_arrays];
+  cugraph_type_erased_device_array_t* wgt[num_local_arrays];
+  cugraph_type_erased_device_array_view_t* vertices_view[num_local_arrays];
+  cugraph_type_erased_device_array_view_t* src_view[num_local_arrays];
+  cugraph_type_erased_device_array_view_t* dst_view[num_local_arrays];
+  cugraph_type_erased_device_array_view_t* wgt_view[num_local_arrays];
+
+  int my_rank = cugraph_resource_handle_get_rank(handle);
+  int comm_size = cugraph_resource_handle_get_comm_size(handle);
+
+  size_t local_num_vertices = (num_vertices + comm_size - 1) / comm_size;
+  size_t local_start_vertex = my_rank * local_num_vertices;
+  size_t local_num_edges = (num_edges + comm_size - 1) / comm_size;
+  size_t local_start_edge = my_rank * local_num_edges;
+
+  local_num_edges = (local_num_edges < (num_edges - local_start_edge)) ? local_num_edges : (num_edges - local_start_edge);
+  local_num_vertices = (local_num_vertices < (num_vertices - local_start_vertex)) ? local_num_vertices : (num_vertices - local_start_vertex);
+
+  for (size_t i = 0 ; i < num_local_arrays ; ++i) {
+    size_t vertex_count = (local_num_vertices + num_local_arrays - 1) / num_local_arrays;
+    size_t vertex_start = i * vertex_count;
+    vertex_count = (vertex_count < (local_num_vertices - vertex_start)) ? vertex_count : (local_num_vertices - vertex_start);
+    
+    ret_code =
+      cugraph_type_erased_device_array_create(handle, vertex_count, vertex_tid, vertices + i, &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "vertices create failed.");
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
+
+    size_t edge_count = (local_num_edges + num_local_arrays - 1) / num_local_arrays;
+    size_t edge_start = i * edge_count;
+    edge_count = (edge_count < (local_num_edges - edge_start)) ? edge_count : (local_num_edges - edge_start);
+
+    ret_code =
+      cugraph_type_erased_device_array_create(handle, edge_count, vertex_tid, src + i, &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src create failed.");
+
+    ret_code =
+      cugraph_type_erased_device_array_create(handle, edge_count, vertex_tid, dst + i, &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dst create failed.");
+
+    ret_code =
+      cugraph_type_erased_device_array_create(handle, edge_count, weight_tid, wgt + i, &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt create failed.");
+
+    vertices_view[i] = cugraph_type_erased_device_array_view(vertices[i]);
+    src_view[i] = cugraph_type_erased_device_array_view(src[i]);
+    dst_view[i] = cugraph_type_erased_device_array_view(dst[i]);
+    wgt_view[i] = cugraph_type_erased_device_array_view(wgt[i]);
+
+    ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+      handle, vertices_view[i], (byte_t*)(h_vertices + local_start_vertex + vertex_start), &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src copy_from_host failed.");
+
+    ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+      handle, src_view[i], (byte_t*)(h_src + local_start_edge + edge_start), &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src copy_from_host failed.");
+
+    ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+      handle, dst_view[i], (byte_t*)(h_dst + local_start_edge + edge_start), &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dst copy_from_host failed.");
+
+    ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+      handle, wgt_view[i], (byte_t*)(h_wgt + local_start_edge + edge_start), &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt copy_from_host failed.");
+  }
+
+  ret_code = cugraph_graph_create_mg(handle,
+                                     &properties,
+                                     (cugraph_type_erased_device_array_view_t const* const*) vertices_view,
+                                     (cugraph_type_erased_device_array_view_t const* const*) src_view,
+                                     (cugraph_type_erased_device_array_view_t const* const*) dst_view,
+                                     (cugraph_type_erased_device_array_view_t const* const*) wgt_view,
+                                     NULL,
+                                     NULL,
+                                     FALSE,
+                                     num_local_arrays,
+                                     FALSE,
+                                     FALSE,
+                                     TRUE,
+                                     &graph,
+                                     &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed.");
+  TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
+
+  //
+  //  Now call pagerank and check results...
+  //
+  cugraph_centrality_result_t* result = NULL;
+
+  ret_code = cugraph_pagerank(handle,
+                              graph,
+                              NULL,
+                              NULL,
+                              NULL,
+                              NULL,
+                              alpha,
+                              epsilon,
+                              max_iterations,
+                              FALSE,
+                              &result,
+                              &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_pagerank failed.");
+
+  // NOTE: Because we get back vertex ids and pageranks, we can simply compare
+  //       the returned values with the expected results for the entire
+  //       graph.  Each GPU will have a subset of the total vertices, so
+  //       they will do a subset of the comparisons.
+  cugraph_type_erased_device_array_view_t* result_vertices;
+  cugraph_type_erased_device_array_view_t* pageranks;
+
+  result_vertices  = cugraph_centrality_result_get_vertices(result);
+  pageranks = cugraph_centrality_result_get_values(result);
+
+  size_t num_local_vertices = cugraph_type_erased_device_array_view_size(result_vertices);
+
+  vertex_t h_result_vertices[num_local_vertices];
+  weight_t h_pageranks[num_local_vertices];
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    handle, (byte_t*)h_result_vertices, result_vertices, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    handle, (byte_t*)h_pageranks, pageranks, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  for (int i = 0; (i < num_local_vertices) && (test_ret_value == 0); ++i) {
+    TEST_ASSERT(test_ret_value,
+                nearlyEqual(h_result[h_result_vertices[i]], h_pageranks[i], 0.001),
+                "pagerank results don't match");
+  }
+
+  cugraph_centrality_result_free(result);
+  cugraph_graph_free(graph);
+
+  for (size_t i = 0 ; i < num_local_arrays ; ++i) {
+    cugraph_type_erased_device_array_view_free(wgt_view[i]);
+    cugraph_type_erased_device_array_view_free(dst_view[i]);
+    cugraph_type_erased_device_array_view_free(src_view[i]);
+    cugraph_type_erased_device_array_view_free(vertices_view[i]);
+    cugraph_type_erased_device_array_free(wgt[i]);
+    cugraph_type_erased_device_array_free(dst[i]);
+    cugraph_type_erased_device_array_free(src[i]);
+    cugraph_type_erased_device_array_free(vertices[i]);
+  }
+
+  cugraph_error_free(ret_error);
+
+  return test_ret_value;
+}
+
+int test_create_mg_graph_multiple_edge_lists_multi_edge(const cugraph_resource_handle_t* handle)
+{
+  int test_ret_value = 0;
+
+  typedef int32_t vertex_t;
+  typedef int32_t edge_t;
+  typedef float weight_t;
+
+  cugraph_error_code_t ret_code = CUGRAPH_SUCCESS;
+  cugraph_error_t* ret_error;
+  size_t num_edges    = 11;
+  size_t num_vertices = 7;
+
+  double alpha          = 0.95;
+  double epsilon        = 0.0001;
+  size_t max_iterations = 20;
+
+  vertex_t h_vertices[] = { 0, 1, 2, 3, 4, 5, 6 };
+  vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5};
+  vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 5, 5, 5};
+  weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 3.2f, 3.2f, 1.1f};
+  weight_t h_result[] = { 0.0859168, 0.158029, 0.0616337, 0.179675, 0.113239, 0.339873, 0.0616337 };
+
+  cugraph_graph_t* graph = NULL;
+  cugraph_graph_properties_t properties;
+
+  properties.is_symmetric  = FALSE;
+  properties.is_multigraph = FALSE;
+
+  data_type_id_t vertex_tid = INT32;
+  data_type_id_t edge_tid   = INT32;
+  data_type_id_t weight_tid = FLOAT32;
+
+  const size_t num_local_arrays = 2;
+
+  cugraph_type_erased_device_array_t* vertices[num_local_arrays];
+  cugraph_type_erased_device_array_t* src[num_local_arrays];
+  cugraph_type_erased_device_array_t* dst[num_local_arrays];
+  cugraph_type_erased_device_array_t* wgt[num_local_arrays];
+  cugraph_type_erased_device_array_view_t* vertices_view[num_local_arrays];
+  cugraph_type_erased_device_array_view_t* src_view[num_local_arrays];
+  cugraph_type_erased_device_array_view_t* dst_view[num_local_arrays];
+  cugraph_type_erased_device_array_view_t* wgt_view[num_local_arrays];
+
+  int my_rank = cugraph_resource_handle_get_rank(handle);
+  int comm_size = cugraph_resource_handle_get_comm_size(handle);
+
+  size_t local_num_vertices = (num_vertices + comm_size - 1) / comm_size;
+  size_t local_start_vertex = my_rank * local_num_vertices;
+  size_t local_num_edges = (num_edges + comm_size - 1) / comm_size;
+  size_t local_start_edge = my_rank * local_num_edges;
+
+  local_num_edges = (local_num_edges < (num_edges - local_start_edge)) ? local_num_edges : (num_edges - local_start_edge);
+  local_num_vertices = (local_num_vertices < (num_vertices - local_start_vertex)) ? local_num_vertices : (num_vertices - local_start_vertex);
+
+  for (size_t i = 0 ; i < num_local_arrays ; ++i) {
+    size_t vertex_count = (local_num_vertices + num_local_arrays - 1) / num_local_arrays;
+    size_t vertex_start = i * vertex_count;
+    vertex_count = (vertex_count < (local_num_vertices - vertex_start)) ? vertex_count : (local_num_vertices - vertex_start);
+    
+    ret_code =
+      cugraph_type_erased_device_array_create(handle, vertex_count, vertex_tid, vertices + i, &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "vertices create failed.");
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
+
+    size_t edge_count = (local_num_edges + num_local_arrays - 1) / num_local_arrays;
+    size_t edge_start = i * edge_count;
+    edge_count = (edge_count < (local_num_edges - edge_start)) ? edge_count : (local_num_edges - edge_start);
+
+    ret_code =
+      cugraph_type_erased_device_array_create(handle, edge_count, vertex_tid, src + i, &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src create failed.");
+
+    ret_code =
+      cugraph_type_erased_device_array_create(handle, edge_count, vertex_tid, dst + i, &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dst create failed.");
+
+    ret_code =
+      cugraph_type_erased_device_array_create(handle, edge_count, weight_tid, wgt + i, &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt create failed.");
+
+    vertices_view[i] = cugraph_type_erased_device_array_view(vertices[i]);
+    src_view[i] = cugraph_type_erased_device_array_view(src[i]);
+    dst_view[i] = cugraph_type_erased_device_array_view(dst[i]);
+    wgt_view[i] = cugraph_type_erased_device_array_view(wgt[i]);
+
+    ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+      handle, vertices_view[i], (byte_t*)(h_vertices + local_start_vertex + vertex_start), &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src copy_from_host failed.");
+
+    ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+      handle, src_view[i], (byte_t*)(h_src + local_start_edge + edge_start), &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src copy_from_host failed.");
+
+    ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+      handle, dst_view[i], (byte_t*)(h_dst + local_start_edge + edge_start), &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "dst copy_from_host failed.");
+
+    ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+      handle, wgt_view[i], (byte_t*)(h_wgt + local_start_edge + edge_start), &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt copy_from_host failed.");
+  }
+
+  ret_code = cugraph_graph_create_mg(handle,
+                                     &properties,
+                                     (cugraph_type_erased_device_array_view_t const* const*) vertices_view,
+                                     (cugraph_type_erased_device_array_view_t const* const*) src_view,
+                                     (cugraph_type_erased_device_array_view_t const* const*) dst_view,
+                                     (cugraph_type_erased_device_array_view_t const* const*) wgt_view,
+                                     NULL,
+                                     NULL,
+                                     FALSE,
+                                     num_local_arrays,
+                                     TRUE,
+                                     TRUE,
+                                     TRUE,
+                                     &graph,
+                                     &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed.");
+  TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
+
+  //
+  //  Now call pagerank and check results...
+  //
+  cugraph_centrality_result_t* result = NULL;
+
+  ret_code = cugraph_pagerank(handle,
+                              graph,
+                              NULL,
+                              NULL,
+                              NULL,
+                              NULL,
+                              alpha,
+                              epsilon,
+                              max_iterations,
+                              FALSE,
+                              &result,
+                              &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_pagerank failed.");
+
+  // NOTE: Because we get back vertex ids and pageranks, we can simply compare
+  //       the returned values with the expected results for the entire
+  //       graph.  Each GPU will have a subset of the total vertices, so
+  //       they will do a subset of the comparisons.
+  cugraph_type_erased_device_array_view_t* result_vertices;
+  cugraph_type_erased_device_array_view_t* pageranks;
+
+  result_vertices  = cugraph_centrality_result_get_vertices(result);
+  pageranks = cugraph_centrality_result_get_values(result);
+
+  size_t num_local_vertices = cugraph_type_erased_device_array_view_size(result_vertices);
+
+  vertex_t h_result_vertices[num_local_vertices];
+  weight_t h_pageranks[num_local_vertices];
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    handle, (byte_t*)h_result_vertices, result_vertices, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    handle, (byte_t*)h_pageranks, pageranks, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  for (int i = 0; (i < num_local_vertices) && (test_ret_value == 0); ++i) {
+    TEST_ASSERT(test_ret_value,
+                nearlyEqual(h_result[h_result_vertices[i]], h_pageranks[i], 0.001),
+                "pagerank results don't match");
+  }
+
+  cugraph_centrality_result_free(result);
+  cugraph_graph_free(graph);
+
+  for (size_t i = 0 ; i < num_local_arrays ; ++i) {
+    cugraph_type_erased_device_array_view_free(wgt_view[i]);
+    cugraph_type_erased_device_array_view_free(dst_view[i]);
+    cugraph_type_erased_device_array_view_free(src_view[i]);
+    cugraph_type_erased_device_array_view_free(vertices_view[i]);
+    cugraph_type_erased_device_array_free(wgt[i]);
+    cugraph_type_erased_device_array_free(dst[i]);
+    cugraph_type_erased_device_array_free(src[i]);
+    cugraph_type_erased_device_array_free(vertices[i]);
+  }
+
+  cugraph_error_free(ret_error);
+
+  return test_ret_value;
+}
+
 /******************************************************************************/
 
 int main(int argc, char** argv)
@@ -131,6 +513,8 @@ int main(int argc, char** argv)
 
   int result = 0;
   result |= RUN_MG_TEST(test_create_mg_graph_simple, handle);
+  result |= RUN_MG_TEST(test_create_mg_graph_multiple_edge_lists, handle);
+  result |= RUN_MG_TEST(test_create_mg_graph_multiple_edge_lists_multi_edge, handle);
 
   cugraph_free_resource_handle(handle);
   free_mg_raft_handle(raft_handle);
diff --git a/dependencies.yaml b/dependencies.yaml
index a89acd9288b..2c0918ad117 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -380,6 +380,7 @@ dependencies:
           - &dask rapids-dask-dependency==23.12.*
           - &dask_cuda dask-cuda==23.12.*
           - &numba numba>=0.57
+          - &numpy numpy>=1.21 
           - &ucx_py ucx-py==0.35.*
       - output_types: conda
         packages:
@@ -399,7 +400,7 @@ dependencies:
       - output_types: [conda, pyproject]
         packages:
           - networkx>=3.0
-          - &numpy numpy>=1.21
+          - *numpy
   python_run_cugraph_dgl:
     common:
       - output_types: [conda, pyproject]
diff --git a/docs/cugraph/Makefile b/docs/cugraph/Makefile
index 32237aa2cc0..f92d0be6910 100644
--- a/docs/cugraph/Makefile
+++ b/docs/cugraph/Makefile
@@ -2,7 +2,7 @@
 #
 
 # You can set these variables from the command line.
-SPHINXOPTS    =
+SPHINXOPTS    = "-v"
 SPHINXBUILD   = sphinx-build
 SPHINXPROJ    = cugraph
 SOURCEDIR     = source
diff --git a/docs/cugraph/source/api_docs/cugraph-ops/bipartite_operators.rst b/docs/cugraph/source/api_docs/cugraph-ops/bipartite_operators.rst
deleted file mode 100644
index e172309fae2..00000000000
--- a/docs/cugraph/source/api_docs/cugraph-ops/bipartite_operators.rst
+++ /dev/null
@@ -1,16 +0,0 @@
-=============================
-Operators on Bipartite Graphs
-=============================
-
-.. currentmodule:: pylibcugraphops
-
-Update Edges: Concatenation or Sum of Edge and Node Features
-------------------------------------------------------------
-.. autosummary::
-   :toctree: ../api/ops/
-
-   operators.update_efeat_bipartite_e2e_concat_fwd
-   operators.update_efeat_bipartite_e2e_concat_bwd
-
-   operators.update_efeat_bipartite_e2e_sum_fwd
-   operators.update_efeat_bipartite_e2e_sum_bwd
diff --git a/docs/cugraph/source/api_docs/cugraph-ops/c_cpp/index.rst b/docs/cugraph/source/api_docs/cugraph-ops/c_cpp/index.rst
new file mode 100644
index 00000000000..5545bebe975
--- /dev/null
+++ b/docs/cugraph/source/api_docs/cugraph-ops/c_cpp/index.rst
@@ -0,0 +1,3 @@
+cugraph-ops C++ API Reference
+=============================
+
diff --git a/docs/cugraph/source/api_docs/cugraph-ops/fg_operators.rst b/docs/cugraph/source/api_docs/cugraph-ops/fg_operators.rst
deleted file mode 100644
index 387844f684a..00000000000
--- a/docs/cugraph/source/api_docs/cugraph-ops/fg_operators.rst
+++ /dev/null
@@ -1,83 +0,0 @@
-========================
-Operators on Full Graphs
-========================
-
-.. currentmodule:: pylibcugraphops
-
-Simple Neighborhood Aggregator (SAGEConv)
------------------------------------------
-.. autosummary::
-   :toctree: ../api/ops/
-
-   operators.agg_simple_fg_n2n_fwd
-   operators.agg_simple_fg_n2n_bwd
-   operators.agg_simple_fg_e2n_fwd
-   operators.agg_simple_fg_e2n_bwd
-   operators.agg_simple_fg_n2n_e2n_fwd
-   operators.agg_simple_fg_n2n_e2n_bwd
-
-   operators.agg_concat_fg_n2n_fwd
-   operators.agg_concat_fg_n2n_bwd
-   operators.agg_concat_fg_e2n_fwd
-   operators.agg_concat_fg_e2n_bwd
-   operators.agg_concat_fg_n2n_e2n_fwd
-   operators.agg_concat_fg_n2n_e2n_bwd
-
-Weighted Neighborhood Aggregation
----------------------------------
-.. autosummary::
-   :toctree: ../api/ops/
-
-   operators.agg_weighted_fg_n2n_fwd
-   operators.agg_weighted_fg_n2n_bwd
-   operators.agg_concat_weighted_fg_n2n_fwd
-   operators.agg_concat_weighted_fg_n2n_bwd
-
-Heterogenous Aggregator using Basis Decomposition (RGCNConv)
-------------------------------------------------------------
-.. autosummary::
-   :toctree: ../api/ops/
-
-   operators.agg_hg_basis_fg_n2n_post_fwd
-   operators.agg_hg_basis_fg_n2n_post_bwd
-
-Graph Attention (GATConv/GATv2Conv)
------------------------------------
-.. autosummary::
-   :toctree: ../api/ops/
-
-   operators.mha_gat_fg_n2n_fwd
-   operators.mha_gat_fg_n2n_bwd
-   operators.mha_gat_fg_n2n_efeat_fwd
-   operators.mha_gat_fg_n2n_efeat_bwd
-
-   operators.mha_gat_v2_fg_n2n_fwd
-   operators.mha_gat_v2_fg_n2n_bwd
-   operators.mha_gat_v2_fg_n2n_efeat_fwd
-   operators.mha_gat_v2_fg_n2n_efeat_bwd
-
-Transformer-like Graph Attention (TransformerConv)
---------------------------------------------------
-.. autosummary::
-   :toctree: ../api/ops/
-
-   operators.mha_gat_v2_fg_n2n_fwd
-   operators.mha_gat_v2_fg_n2n_bwd
-   operators.mha_gat_v2_fg_n2n_efeat_fwd
-   operators.mha_gat_v2_fg_n2n_efeat_bwd
-
-Directional Message-Passing (DMPNN)
------------------------------------
-.. autosummary::
-   :toctree: ../api/ops/
-
-   operators.agg_dmpnn_fg_e2e_fwd
-   operators.agg_dmpnn_fg_e2e_bwd
-
-Graph Pooling
--------------
-.. autosummary::
-   :toctree: ../api/ops/
-
-   operators.pool_fg_n2s_fwd
-   operators.pool_fg_n2s_bwd
diff --git a/docs/cugraph/source/api_docs/cugraph-ops/graph_types.rst b/docs/cugraph/source/api_docs/cugraph-ops/graph_types.rst
deleted file mode 100644
index 9289ce53e39..00000000000
--- a/docs/cugraph/source/api_docs/cugraph-ops/graph_types.rst
+++ /dev/null
@@ -1,33 +0,0 @@
-===========
-Graph types
-===========
-
-.. currentmodule:: pylibcugraphops
-
-Message-Flow Graph (MFG)
--------------------------
-.. autosummary::
-   :toctree: ../api/ops/
-
-   make_mfg_csr
-
-Heterogenous MFG
-----------------
-.. autosummary::
-   :toctree: ../api/ops/
-
-   make_mfg_csr_hg
-
-"Full" Graph (FG)
------------------
-.. autosummary::
-   :toctree: ../api/ops/
-
-   make_fg_csr
-
-Heterogenous FG
----------------
-.. autosummary::
-   :toctree: ../api/ops/
-
-   make_fg_csr_hg
diff --git a/docs/cugraph/source/api_docs/cugraph-ops/index.rst b/docs/cugraph/source/api_docs/cugraph-ops/index.rst
index e2338dc1833..fdfd5baab96 100644
--- a/docs/cugraph/source/api_docs/cugraph-ops/index.rst
+++ b/docs/cugraph/source/api_docs/cugraph-ops/index.rst
@@ -1,4 +1,3 @@
-=========================
 cugraph-ops API reference
 =========================
 
@@ -8,11 +7,5 @@ This page provides a list of all publicly accessible modules, methods and classe
     :maxdepth: 2
     :caption: API Documentation
 
-    graph_types
-    pytorch
-    mfg_operators
-    bipartite_operators
-    static_operators
-    fg_operators
-    dimenet
-    pytorch
+    python/index
+    c_cpp/index
\ No newline at end of file
diff --git a/docs/cugraph/source/api_docs/cugraph-ops/mfg_operators.rst b/docs/cugraph/source/api_docs/cugraph-ops/mfg_operators.rst
deleted file mode 100644
index f3dd1faa245..00000000000
--- a/docs/cugraph/source/api_docs/cugraph-ops/mfg_operators.rst
+++ /dev/null
@@ -1,31 +0,0 @@
-================================
-Operators on Message-Flow Graphs
-================================
-
-.. currentmodule:: pylibcugraphops
-
-Simple Neighborhood Aggregator (SAGEConv)
------------------------------------------
-.. autosummary::
-   :toctree: ../api/ops/
-
-   operators.agg_simple_mfg_n2n_fwd
-   operators.agg_simple_mfg_n2n_bwd
-   operators.agg_concat_mfg_n2n_fwd
-   operators.agg_concat_mfg_n2n_bwd
-
-Graph Attention (GATConv)
--------------------------
-.. autosummary::
-   :toctree: ../api/ops/
-
-   operators.mha_gat_mfg_n2n_fwd
-   operators.mha_gat_mfg_n2n_bwd
-
-Heterogenous Aggregator using Basis Decomposition (RGCNConv)
-------------------------------------------------------------
-.. autosummary::
-   :toctree: ../api/ops/
-
-   operators.agg_hg_basis_mfg_n2n_post_fwd
-   operators.agg_hg_basis_mfg_n2n_post_bwd
diff --git a/docs/cugraph/source/api_docs/cugraph-ops/dimenet.rst b/docs/cugraph/source/api_docs/cugraph-ops/python/dimenet.rst
similarity index 89%
rename from docs/cugraph/source/api_docs/cugraph-ops/dimenet.rst
rename to docs/cugraph/source/api_docs/cugraph-ops/python/dimenet.rst
index b709464c7e6..6fadcc57b22 100644
--- a/docs/cugraph/source/api_docs/cugraph-ops/dimenet.rst
+++ b/docs/cugraph/source/api_docs/cugraph-ops/python/dimenet.rst
@@ -7,7 +7,7 @@ Dimenet operators
 Radial Basis Functions
 ----------------------
 .. autosummary::
-   :toctree: ../api/ops/
+   :toctree: ../../api/ops
 
    dimenet.radial_basis_fwd
    dimenet.radial_basis_bwd
@@ -16,7 +16,7 @@ Radial Basis Functions
 Edge-to-Edge Aggregation
 -------------------------
 .. autosummary::
-   :toctree: ../api/ops/
+   :toctree: ../../api/ops
 
    dimenet.agg_edge_to_edge_fwd
    dimenet.agg_edge_to_edge_bwd
diff --git a/docs/cugraph/source/api_docs/cugraph-ops/python/graph_types.rst b/docs/cugraph/source/api_docs/cugraph-ops/python/graph_types.rst
new file mode 100644
index 00000000000..141d40393a5
--- /dev/null
+++ b/docs/cugraph/source/api_docs/cugraph-ops/python/graph_types.rst
@@ -0,0 +1,34 @@
+===========
+Graph types
+===========
+
+.. currentmodule:: pylibcugraphops
+
+
+CSC Graph
+-----------------
+.. autosummary::
+   :toctree: ../../api/ops
+
+   make_csc
+
+Heterogenous CSC Graph
+----------------------
+.. autosummary::
+   :toctree: ../../api/ops
+
+   make_csc_hg
+
+Bipartite Graph
+-----------------
+.. autosummary::
+   :toctree: ../../api/ops
+
+   make_bipartite_csc
+
+Heterogenous Bipartite Graph
+----------------------------
+.. autosummary::
+   :toctree: ../../api/ops
+
+   make_bipartite_csc_hg
diff --git a/docs/cugraph/source/api_docs/cugraph-ops/python/index.rst b/docs/cugraph/source/api_docs/cugraph-ops/python/index.rst
new file mode 100644
index 00000000000..082c7741f23
--- /dev/null
+++ b/docs/cugraph/source/api_docs/cugraph-ops/python/index.rst
@@ -0,0 +1,13 @@
+cugraph-ops Python API reference
+================================ 
+
+This page provides a list of all publicly accessible modules, methods and classes through `pylibcugraphops.*` namespace.
+
+.. toctree::
+    :maxdepth: 2
+    :caption: API Documentation
+
+    graph_types
+    operators
+    dimenet
+    pytorch
diff --git a/docs/cugraph/source/api_docs/cugraph-ops/python/operators.rst b/docs/cugraph/source/api_docs/cugraph-ops/python/operators.rst
new file mode 100644
index 00000000000..3e6664b2db5
--- /dev/null
+++ b/docs/cugraph/source/api_docs/cugraph-ops/python/operators.rst
@@ -0,0 +1,93 @@
+=============================
+Operators for Message-Passing
+=============================
+
+.. currentmodule:: pylibcugraphops
+
+Simple Neighborhood Aggregator (SAGEConv)
+-----------------------------------------
+.. autosummary::
+   :toctree: ../../api/ops
+
+   operators.agg_simple_n2n_fwd
+   operators.agg_simple_n2n_bwd
+   operators.agg_simple_e2n_fwd
+   operators.agg_simple_e2n_bwd
+   operators.agg_simple_n2n_e2n_fwd
+   operators.agg_simple_n2n_e2n_bwd
+
+   operators.agg_concat_n2n_fwd
+   operators.agg_concat_n2n_bwd
+   operators.agg_concat_e2n_fwd
+   operators.agg_concat_e2n_bwd
+   operators.agg_concat_n2n_e2n_fwd
+   operators.agg_concat_n2n_e2n_bwd
+
+
+Weighted Neighborhood Aggregation
+---------------------------------
+.. autosummary::
+   :toctree: ../../api/ops
+
+   operators.agg_weighted_n2n_fwd
+   operators.agg_weighted_n2n_bwd
+   operators.agg_concat_weighted_n2n_fwd
+   operators.agg_concat_weighted_n2n_bwd
+
+Heterogenous Aggregator using Basis Decomposition (RGCNConv)
+------------------------------------------------------------
+.. autosummary::
+   :toctree: ../../api/ops
+
+   operators.agg_hg_basis_n2n_post_fwd
+   operators.agg_hg_basis_n2n_post_bwd
+
+Graph Attention (GATConv/GATv2Conv)
+-----------------------------------
+.. autosummary::
+   :toctree: ../../api/ops
+
+   operators.mha_gat_n2n_fwd
+   operators.mha_gat_n2n_bwd
+   operators.mha_gat_n2n_efeat_fwd
+   operators.mha_gat_n2n_efeat_bwd
+
+   operators.mha_gat_v2_n2n_fwd
+   operators.mha_gat_v2_n2n_bwd
+   operators.mha_gat_v2_n2n_efeat_fwd
+   operators.mha_gat_v2_n2n_efeat_bwd
+
+Transformer-like Graph Attention (TransformerConv)
+--------------------------------------------------
+.. autosummary::
+   :toctree: ../../api/ops
+
+   operators.mha_gat_v2_n2n_fwd
+   operators.mha_gat_v2_n2n_bwd
+   operators.mha_gat_v2_n2n_efeat_fwd
+   operators.mha_gat_v2_n2n_efeat_bwd
+
+Directional Message-Passing (DMPNN)
+-----------------------------------
+.. autosummary::
+   :toctree: ../../api/ops
+
+   operators.agg_dmpnn_e2e_fwd
+   operators.agg_dmpnn_e2e_bwd
+
+Update Edges: Concatenation or Sum of Edge and Node Features
+------------------------------------------------------------
+.. autosummary::
+   :toctree: ../../api/ops
+
+   operators.update_efeat_e2e_concat_fwd
+   operators.update_efeat_e2e_concat_bwd
+
+   operators.update_efeat_e2e_sum_fwd
+   operators.update_efeat_e2e_sum_bwd
+
+   operators.update_efeat_e2e_concat_fwd
+   operators.update_efeat_e2e_concat_bwd
+
+   operators.update_efeat_e2e_sum_fwd
+   operators.update_efeat_e2e_sum_bwd
diff --git a/docs/cugraph/source/api_docs/cugraph-ops/pytorch.rst b/docs/cugraph/source/api_docs/cugraph-ops/python/pytorch.rst
similarity index 59%
rename from docs/cugraph/source/api_docs/cugraph-ops/pytorch.rst
rename to docs/cugraph/source/api_docs/cugraph-ops/python/pytorch.rst
index 83800fbc546..d2074df15b0 100644
--- a/docs/cugraph/source/api_docs/cugraph-ops/pytorch.rst
+++ b/docs/cugraph/source/api_docs/cugraph-ops/python/pytorch.rst
@@ -2,35 +2,35 @@
 PyTorch Autograd Wrappers
 ==========================
 
-.. currentmodule:: pylibcugraphops
+.. currentmodule:: pylibcugraphops.pytorch
 
 Simple Neighborhood Aggregator (SAGEConv)
 -----------------------------------------
 .. autosummary::
-   :toctree: ../api/ops/
+   :toctree: ../../api/ops
 
-   pytorch.operators.agg_concat_n2n
+   operators.agg_concat_n2n
 
 Graph Attention (GATConv/GATv2Conv)
 -----------------------------------
 .. autosummary::
-   :toctree: ../api/ops/
+   :toctree: ../../api/ops
 
-   pytorch.operators.mha_gat_n2n
-   pytorch.operators.mha_gat_v2_n2n
+   operators.mha_gat_n2n
+   operators.mha_gat_v2_n2n
 
 Heterogenous Aggregator using Basis Decomposition (RGCNConv)
 ------------------------------------------------------------
 .. autosummary::
-   :toctree: ../api/ops/
+   :toctree: ../../api/ops
 
-   pytorch.operators.agg_hg_basis_n2n_post
+   operators.agg_hg_basis_n2n_post
 
 
 Update Edges: Concatenation or Sum of Edge and Node Features
 ------------------------------------------------------------
 .. autosummary::
-   :toctree: ../api/ops/
+   :toctree: ../../api/ops
 
-   pytorch.operators.update_efeat_bipartite_e2e
-   pytorch.operators.update_efeat_static_e2e
+   operators.update_efeat_e2e
+   operators.update_efeat_e2e
diff --git a/docs/cugraph/source/api_docs/cugraph-ops/static_operators.rst b/docs/cugraph/source/api_docs/cugraph-ops/static_operators.rst
deleted file mode 100644
index f3ecc068f22..00000000000
--- a/docs/cugraph/source/api_docs/cugraph-ops/static_operators.rst
+++ /dev/null
@@ -1,16 +0,0 @@
-==========================
-Operators on Static Graphs
-==========================
-
-.. currentmodule:: pylibcugraphops
-
-Update Edges: Concatenation or Sum of Edge and Node Features
-------------------------------------------------------------
-.. autosummary::
-   :toctree: ../api/ops/
-
-   operators.update_efeat_static_e2e_concat_fwd
-   operators.update_efeat_static_e2e_concat_bwd
-
-   operators.update_efeat_static_e2e_sum_fwd
-   operators.update_efeat_static_e2e_sum_bwd
diff --git a/docs/cugraph/source/api_docs/cugraph-pyg/cugraph_pyg.rst b/docs/cugraph/source/api_docs/cugraph-pyg/cugraph_pyg.rst
index 2cd8969aa66..f7d7f5f2262 100644
--- a/docs/cugraph/source/api_docs/cugraph-pyg/cugraph_pyg.rst
+++ b/docs/cugraph/source/api_docs/cugraph-pyg/cugraph_pyg.rst
@@ -9,6 +9,6 @@ cugraph-pyg
 .. autosummary::
    :toctree: ../api/cugraph-pyg/
 
-   cugraph_pyg.data.cugraph_store.EXPERIMENTAL__CuGraphStore
-   cugraph_pyg.sampler.cugraph_sampler.EXPERIMENTAL__CuGraphSampler
+..   cugraph_pyg.data.cugraph_store.EXPERIMENTAL__CuGraphStore
+..   cugraph_pyg.sampler.cugraph_sampler.EXPERIMENTAL__CuGraphSampler
    
diff --git a/docs/cugraph/source/api_docs/cugraph_c/c_and_cpp.rst b/docs/cugraph/source/api_docs/cugraph_c/c_and_cpp.rst
deleted file mode 100644
index 34b812785d3..00000000000
--- a/docs/cugraph/source/api_docs/cugraph_c/c_and_cpp.rst
+++ /dev/null
@@ -1,4 +0,0 @@
-CuGraph C and C++ API Links
-===========================
-
-coming soon - see https://docs.rapids.ai/api/libcugraph/nightly/
\ No newline at end of file
diff --git a/docs/cugraph/source/api_docs/cugraph_c/centrality.rst b/docs/cugraph/source/api_docs/cugraph_c/centrality.rst
new file mode 100644
index 00000000000..f34e26ad76e
--- /dev/null
+++ b/docs/cugraph/source/api_docs/cugraph_c/centrality.rst
@@ -0,0 +1,51 @@
+Centrality
+==========
+
+PageRank
+--------
+.. doxygenfunction:: cugraph_pagerank
+    :project: libcugraph
+
+.. doxygenfunction:: cugraph_pagerank_allow_nonconvergence
+    :project: libcugraph
+
+Personalized PageRank
+---------------------
+.. doxygenfunction:: cugraph_personalized_pagerank
+    :project: libcugraph
+
+.. doxygenfunction:: cugraph_personalized_pagerank_allow_nonconvergence
+    :project: libcugraph
+
+Eigenvector Centrality
+----------------------
+.. doxygenfunction:: cugraph_eigenvector_centrality
+    :project: libcugraph
+
+Katz Centrality
+---------------
+.. doxygenfunction:: cugraph_katz_centrality
+    :project: libcugraph
+
+Betweenness Centrality
+----------------------
+.. doxygenfunction:: cugraph_betweenness_centrality
+    :project: libcugraph
+
+Edge Betweenness Centrality
+---------------------------
+.. doxygenfunction:: cugraph_edge_betweenness_centrality
+    :project: libcugraph
+
+HITS Centrality
+---------------
+.. doxygenfunction:: cugraph_hits
+    :project: libcugraph
+
+Centrality Support Functions
+----------------------------
+ .. doxygengroup:: centrality
+     :project: libcugraph
+     :members:
+     :content-only:
+
diff --git a/docs/cugraph/source/api_docs/cugraph_c/community.rst b/docs/cugraph/source/api_docs/cugraph_c/community.rst
new file mode 100644
index 00000000000..0bbfe365c4d
--- /dev/null
+++ b/docs/cugraph/source/api_docs/cugraph_c/community.rst
@@ -0,0 +1,63 @@
+Community
+=========
+
+.. role:: py(code)
+   :language: c
+   :class: highlight
+
+``#include <cugraph_c/community_algorithms.h>``
+
+Triangle Counting
+-----------------
+.. doxygenfunction:: cugraph_triangle_count
+    :project: libcugraph
+
+Louvain
+-------
+.. doxygenfunction:: cugraph_louvain
+    :project: libcugraph
+
+Leiden
+------
+.. doxygenfunction:: cugraph_leiden
+    :project: libcugraph
+
+ECG
+---
+.. doxygenfunction:: cugraph_ecg
+    :project: libcugraph
+
+Extract Egonet
+--------------
+.. doxygenfunction:: cugraph_extract_ego
+    :project: libcugraph
+
+Balanced Cut
+------------
+.. doxygenfunction:: cugraph_balanced_cut_clustering
+    :project: libcugraph
+
+Spectral Clustering - Modularity Maximization
+---------------------------------------------
+.. doxygenfunction:: cugraph_spectral_modularity_maximization
+    :project: libcugraph
+
+.. doxygenfunction:: cugraph_analyze_clustering_modularity
+    :project: libcugraph
+
+Spectral Clusteriong - Edge Cut
+-------------------------------
+.. doxygenfunction:: cugraph_analyze_clustering_edge_cut
+    :project: libcugraph
+
+.. doxygenfunction:: cugraph_analyze_clustering_ratio_cut
+    :project: libcugraph
+
+
+Community Support Functions
+---------------------------
+ .. doxygengroup:: community
+     :project: libcugraph
+     :members:
+     :content-only:
+
diff --git a/docs/cugraph/source/api_docs/cugraph_c/core.rst b/docs/cugraph/source/api_docs/cugraph_c/core.rst
new file mode 100644
index 00000000000..34456c65e43
--- /dev/null
+++ b/docs/cugraph/source/api_docs/cugraph_c/core.rst
@@ -0,0 +1,21 @@
+Core
+====
+
+
+Core Number
+-----------
+.. doxygenfunction:: cugraph_core_number
+    :project: libcugraph
+
+K-Core
+------
+.. doxygenfunction:: cugraph_k_core
+    :project: libcugraph
+
+
+Core Support Functions
+----------------------
+ .. doxygengroup:: core
+     :project: libcugraph
+     :members:
+     :content-only:
diff --git a/docs/cugraph/source/api_docs/cugraph_c/index.rst b/docs/cugraph/source/api_docs/cugraph_c/index.rst
new file mode 100644
index 00000000000..3dd37dbc374
--- /dev/null
+++ b/docs/cugraph/source/api_docs/cugraph_c/index.rst
@@ -0,0 +1,16 @@
+===========================
+cuGraph C API documentation
+===========================
+
+
+.. toctree::
+    :maxdepth: 3
+    :caption: API Documentation
+
+    centrality.rst
+    community.rst
+    core.rst
+    labeling.rst
+    sampling.rst
+    similarity.rst
+    traversal.rst
diff --git a/docs/cugraph/source/api_docs/cugraph_c/labeling.rst b/docs/cugraph/source/api_docs/cugraph_c/labeling.rst
new file mode 100644
index 00000000000..af105ee8fc9
--- /dev/null
+++ b/docs/cugraph/source/api_docs/cugraph_c/labeling.rst
@@ -0,0 +1,20 @@
+Components
+==========
+
+
+Weakly Connected Components
+---------------------------
+.. doxygenfunction:: cugraph_weakly_connected_components
+    :project: libcugraph
+
+Strongly Connected Components
+-----------------------------
+.. doxygenfunction:: cugraph_strongly_connected_components
+    :project: libcugraph
+
+Support
+-------
+ .. doxygengroup:: labeling
+     :project: libcugraph
+     :members:
+     :content-only:
\ No newline at end of file
diff --git a/docs/cugraph/source/api_docs/cugraph_c/sampling.rst b/docs/cugraph/source/api_docs/cugraph_c/sampling.rst
new file mode 100644
index 00000000000..21b837daf93
--- /dev/null
+++ b/docs/cugraph/source/api_docs/cugraph_c/sampling.rst
@@ -0,0 +1,37 @@
+Sampling
+========
+
+Uniform Random Walks
+--------------------
+.. doxygenfunction:: cugraph_uniform_random_walks
+    :project: libcugraph
+
+Biased Random Walks
+--------------------
+.. doxygenfunction:: cugraph_biased_random_walks
+    :project: libcugraph
+
+Random Walks via Node2Vec
+-------------------------
+.. doxygenfunction:: cugraph_node2vec_random_walks
+    :project: libcugraph
+
+Node2Vec
+--------
+.. doxygenfunction:: cugraph_node2vec
+    :project: libcugraph
+
+Uniform Neighborhood Sampling
+-----------------------------
+.. doxygenfunction:: cugraph_uniform_neighbor_sample_with_edge_properties
+    :project: libcugraph
+
+.. doxygenfunction:: cugraph_uniform_neighbor_sample
+    :project: libcugraph
+
+Support
+-------
+.. doxygengroup:: samplingC
+     :project: libcugraph
+     :members:
+     :content-only:
diff --git a/docs/cugraph/source/api_docs/cugraph_c/similarity.rst b/docs/cugraph/source/api_docs/cugraph_c/similarity.rst
new file mode 100644
index 00000000000..fba07ad206c
--- /dev/null
+++ b/docs/cugraph/source/api_docs/cugraph_c/similarity.rst
@@ -0,0 +1,25 @@
+Similarity
+==========
+
+
+Jaccard
+-------
+.. doxygenfunction:: cugraph_jaccard_coefficients
+    :project: libcugraph
+
+Sorensen
+--------
+.. doxygenfunction:: cugraph_sorensen_coefficients
+    :project: libcugraph
+
+Overlap
+-------
+.. doxygenfunction:: cugraph_overlap_coefficients
+    :project: libcugraph
+
+Support
+-------
+.. doxygengroup:: similarity
+     :project: libcugraph
+     :members:
+     :content-only:
\ No newline at end of file
diff --git a/docs/cugraph/source/api_docs/cugraph_c/traversal.rst b/docs/cugraph/source/api_docs/cugraph_c/traversal.rst
new file mode 100644
index 00000000000..c90760e9e79
--- /dev/null
+++ b/docs/cugraph/source/api_docs/cugraph_c/traversal.rst
@@ -0,0 +1,30 @@
+Traversal
+==========
+
+
+Breadth First Search (BFS)
+--------------------------
+.. doxygenfunction:: cugraph_bfs
+    :project: libcugraph
+
+Single-Source Shortest-Path (SSSP)
+----------------------------------
+.. doxygenfunction:: cugraph_sssp
+    :project: libcugraph
+
+Path Extraction
+---------------
+.. doxygenfunction:: cugraph_extract_paths
+    :project: libcugraph
+
+Extract Max Path Length
+-----------------------
+.. doxygenfunction:: cugraph_extract_paths_result_get_max_path_length
+    :project: libcugraph
+
+Support
+-------
+.. doxygengroup:: traversal
+     :project: libcugraph
+     :members:
+     :content-only:
\ No newline at end of file
diff --git a/docs/cugraph/source/api_docs/index.rst b/docs/cugraph/source/api_docs/index.rst
index 45f7210f5a2..74ca98bb98d 100644
--- a/docs/cugraph/source/api_docs/index.rst
+++ b/docs/cugraph/source/api_docs/index.rst
@@ -1,16 +1,39 @@
-Python API reference
-====================
+API Reference
+=============
 
 This page provides a list of all publicly accessible Python modules with in the Graph collection
 
+Core Graph API Documentation
+----------------------------
+
 .. toctree::
-    :maxdepth: 2
-    :caption: Python API Documentation
+    :maxdepth: 3
+    :caption: Core Graph API Documentation
 
     cugraph/index.rst
     plc/pylibcugraph.rst
+    cugraph_c/index.rst
+    cugraph_cpp/index.rst
+
+Graph Nerual Networks API Documentation
+---------------------------------------
+
+.. toctree::
+    :maxdepth: 3
+    :caption: Graph Nerual Networks API Documentation
+
     cugraph-dgl/cugraph_dgl.rst
     cugraph-pyg/cugraph_pyg.rst
-    service/index.rst
     cugraph-ops/index.rst
+    wholegraph/index.rst
+
+Additional Graph Packages API Documentation
+----------------------------------
+
+.. toctree::
+    :maxdepth: 3
+    :caption: Additional Graph Packages API Documentation
+
+    service/index.rst
+
 
diff --git a/docs/cugraph/source/api_docs/service/cugraph_service_client.rst b/docs/cugraph/source/api_docs/service/cugraph_service_client.rst
index 383b31d269a..7e344d326f7 100644
--- a/docs/cugraph/source/api_docs/service/cugraph_service_client.rst
+++ b/docs/cugraph/source/api_docs/service/cugraph_service_client.rst
@@ -9,7 +9,7 @@ cugraph-service
 .. autosummary::
    :toctree: ../api/service/
 
-   cugraph_service_client.client.RunAsyncioThread
+..   cugraph_service_client.client.RunAsyncioThread
    cugraph_service_client.client.run_async
    cugraph_service_client.client.DeviceArrayAllocator
    cugraph_service_client.client.CugraphServiceClient
diff --git a/docs/cugraph/source/api_docs/service/cugraph_service_server.rst b/docs/cugraph/source/api_docs/service/cugraph_service_server.rst
index a7e8b547573..09ca8360b6c 100644
--- a/docs/cugraph/source/api_docs/service/cugraph_service_server.rst
+++ b/docs/cugraph/source/api_docs/service/cugraph_service_server.rst
@@ -9,6 +9,6 @@ cugraph-service
 .. autosummary::
    :toctree: ../api/service/
 
-   cugraph_service_server.cugraph_handler.call_algo
+..   cugraph_service_server.cugraph_handler.call_algo
    cugraph_service_server.cugraph_handler.ExtensionServerFacade
    cugraph_service_server.cugraph_handler.CugraphHandler
diff --git a/docs/cugraph/source/api_docs/wholegraph/index.rst b/docs/cugraph/source/api_docs/wholegraph/index.rst
new file mode 100644
index 00000000000..80e231d4610
--- /dev/null
+++ b/docs/cugraph/source/api_docs/wholegraph/index.rst
@@ -0,0 +1,11 @@
+WholeGraph API reference
+========================
+
+This page provides WholeGraph API reference
+
+.. toctree::
+    :maxdepth: 2
+    :caption: WholeGraph API Documentation
+
+    libwholegraph/index.rst
+    pylibwholegraph/index.rst
diff --git a/docs/cugraph/source/api_docs/wholegraph/libwholegraph/index.rst b/docs/cugraph/source/api_docs/wholegraph/libwholegraph/index.rst
new file mode 100644
index 00000000000..4ef68abef2d
--- /dev/null
+++ b/docs/cugraph/source/api_docs/wholegraph/libwholegraph/index.rst
@@ -0,0 +1,228 @@
+=====================
+libwholegraph API doc
+=====================
+
+Doxygen WholeGraph C API documentation
+--------------------------------------
+For doxygen documentation, please refer to `Doxygen Documentation <../../doxygen_docs/libwholegraph/html/index.html>`_
+
+WholeGraph C API documentation
+------------------------------
+
+Library Level APIs
+++++++++++++++++++
+
+.. doxygenenum:: wholememory_error_code_t
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_init
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_finalize
+    :project: libwholegraph
+.. doxygenfunction:: fork_get_device_count
+    :project: libwholegraph
+
+WholeMemory Communicator APIs
++++++++++++++++++++++++++++++
+
+.. doxygentypedef:: wholememory_comm_t
+    :project: libwholegraph
+.. doxygenstruct:: wholememory_unique_id_t
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_create_unique_id
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_create_communicator
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_destroy_communicator
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_communicator_get_rank
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_communicator_get_size
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_communicator_barrier
+    :project: libwholegraph
+
+WholeMemoryHandle APIs
+++++++++++++++++++++++
+
+.. doxygenenum:: wholememory_memory_type_t
+    :project: libwholegraph
+.. doxygenenum:: wholememory_memory_location_t
+    :project: libwholegraph
+.. doxygentypedef:: wholememory_handle_t
+    :project: libwholegraph
+.. doxygenstruct:: wholememory_gref_t
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_malloc
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_free
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_get_communicator
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_get_memory_type
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_get_memory_location
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_get_total_size
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_get_data_granularity
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_get_local_memory
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_get_rank_memory
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_get_global_pointer
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_get_global_reference
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_determine_partition_plan
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_determine_entry_partition_plan
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_get_partition_plan
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_load_from_file
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_store_to_file
+    :project: libwholegraph
+
+WholeMemoryTensor APIs
+++++++++++++++++++++++
+
+.. doxygenenum:: wholememory_dtype_t
+    :project: libwholegraph
+.. doxygenstruct:: wholememory_array_description_t
+    :project: libwholegraph
+.. doxygenstruct:: wholememory_matrix_description_t
+    :project: libwholegraph
+.. doxygenstruct:: wholememory_tensor_description_t
+    :project: libwholegraph
+.. doxygentypedef:: wholememory_tensor_t
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_dtype_get_element_size
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_dtype_is_floating_number
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_dtype_is_integer_number
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_create_array_desc
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_create_matrix_desc
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_initialize_tensor_desc
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_copy_array_desc_to_matrix
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_copy_array_desc_to_tensor
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_copy_matrix_desc_to_tensor
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_convert_tensor_desc_to_array
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_convert_tensor_desc_to_matrix
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_get_memory_element_count_from_array
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_get_memory_size_from_array
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_get_memory_element_count_from_matrix
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_get_memory_size_from_matrix
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_get_memory_element_count_from_tensor
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_get_memory_size_from_tensor
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_unsqueeze_tensor
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_create_tensor
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_destroy_tensor
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_make_tensor_from_pointer
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_make_tensor_from_handle
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_tensor_has_handle
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_tensor_get_memory_handle
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_tensor_get_tensor_description
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_tensor_get_global_reference
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_tensor_map_local_tensor
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_tensor_get_data_pointer
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_tensor_get_entry_per_partition
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_tensor_get_subtensor
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_tensor_get_root
+    :project: libwholegraph
+
+Ops on WholeMemory Tensors
+++++++++++++++++++++++++++
+
+.. doxygenfunction:: wholememory_gather
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_scatter
+    :project: libwholegraph
+
+WholeTensorEmbedding APIs
++++++++++++++++++++++++++
+
+.. doxygentypedef:: wholememory_embedding_cache_policy_t
+    :project: libwholegraph
+.. doxygentypedef:: wholememory_embedding_optimizer_t
+    :project: libwholegraph
+.. doxygentypedef:: wholememory_embedding_t
+    :project: libwholegraph
+.. doxygenenum:: wholememory_access_type_t
+    :project: libwholegraph
+.. doxygenenum:: wholememory_optimizer_type_t
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_create_embedding_optimizer
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_optimizer_set_parameter
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_destroy_embedding_optimizer
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_create_embedding_cache_policy
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_destroy_embedding_cache_policy
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_create_embedding
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_destroy_embedding
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_embedding_get_embedding_tensor
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_embedding_gather
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_embedding_gather_gradient_apply
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_embedding_get_optimizer_state_names
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_embedding_get_optimizer_state
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_embedding_writeback_cache
+    :project: libwholegraph
+.. doxygenfunction:: wholememory_embedding_drop_all_cache
+    :project: libwholegraph
+
+Ops on graphs stored in WholeMemory
++++++++++++++++++++++++++++++++++++
+
+.. doxygenfunction:: wholegraph_csr_unweighted_sample_without_replacement
+    :project: libwholegraph
+.. doxygenfunction:: wholegraph_csr_weighted_sample_without_replacement
+    :project: libwholegraph
+
+Miscellaneous Ops for graph
++++++++++++++++++++++++++++
+
+.. doxygenfunction:: graph_append_unique
+    :project: libwholegraph
+.. doxygenfunction:: csr_add_self_loop
+    :project: libwholegraph
diff --git a/docs/cugraph/source/api_docs/wholegraph/pylibwholegraph/index.rst b/docs/cugraph/source/api_docs/wholegraph/pylibwholegraph/index.rst
new file mode 100644
index 00000000000..67aab00acef
--- /dev/null
+++ b/docs/cugraph/source/api_docs/wholegraph/pylibwholegraph/index.rst
@@ -0,0 +1,38 @@
+=======================
+pylibwholegraph API doc
+=======================
+
+.. currentmodule:: pylibwholegraph
+
+APIs
+----
+.. autosummary::
+    :toctree: ../../api/wg
+
+    torch.initialize.init_torch_env
+    torch.initialize.init_torch_env_and_create_wm_comm
+    torch.initialize.finalize
+    torch.comm.WholeMemoryCommunicator
+    torch.comm.set_world_info
+    torch.comm.create_group_communicator
+    torch.comm.destroy_communicator
+    torch.comm.get_global_communicator
+    torch.comm.get_local_node_communicator
+    torch.comm.get_local_device_communicator
+    torch.tensor.WholeMemoryTensor
+    torch.tensor.create_wholememory_tensor
+    torch.tensor.create_wholememory_tensor_from_filelist
+    torch.tensor.destroy_wholememory_tensor
+    torch.embedding.WholeMemoryOptimizer
+    torch.embedding.create_wholememory_optimizer
+    torch.embedding.destroy_wholememory_optimizer
+    torch.embedding.WholeMemoryCachePolicy
+    torch.embedding.create_wholememory_cache_policy
+    torch.embedding.create_builtin_cache_policy
+    torch.embedding.destroy_wholememory_cache_policy
+    torch.embedding.WholeMemoryEmbedding
+    torch.embedding.create_embedding
+    torch.embedding.create_embedding_from_filelist
+    torch.embedding.destroy_embedding
+    torch.embedding.WholeMemoryEmbeddingModule
+    torch.graph_structure.GraphStructure
diff --git a/docs/cugraph/source/basics/cugraph_intro.md b/docs/cugraph/source/basics/cugraph_intro.md
index 0684129503f..10d14f8a0d7 100644
--- a/docs/cugraph/source/basics/cugraph_intro.md
+++ b/docs/cugraph/source/basics/cugraph_intro.md
@@ -21,7 +21,7 @@ call graph algorithms using data stored in a GPU DataFrame, NetworkX Graphs, or
 CuPy or SciPy sparse Matrix.  
 
 
-# Vision
+## Vision
 The vision of RAPIDS cuGraph is to ___make graph analysis ubiquitous to the 
 point that users just think in terms of analysis and not technologies or 
 frameworks___. This is a goal that many of us on the cuGraph team have been 
@@ -49,7 +49,7 @@ RAPIDS and DASK allows cuGraph to scale to multiple GPUs to support
 multi-billion edge graphs.
 
 
-# Terminology
+## Terminology
 
 cuGraph is a collection of GPU accelerated graph algorithms and graph utility
 functions. The application of graph analysis covers a lot of areas.
@@ -67,8 +67,7 @@ documentation we will mostly use the terms __Node__ and __Edge__ to better
 match NetworkX preferred term use, as well as other Python-based tools.  At
 the CUDA/C layer, we favor the mathematical terms of __Vertex__ and __Edge__.  
 
-# Roadmap
-GitHub does not provide a robust project management interface, and so a roadmap turns into simply a projection of when work will be completed and not a complete picture of everything that needs to be done.  To capture the work that requires multiple steps, issues are labels as “EPIC” and include multiple subtasks that could span multiple releases.   The EPIC will be in the release where work in expected to be completed. A better roadmap is being worked an image of the roadmap will be posted when ready.
 
- * GitHub Project Board:  https://github.com/rapidsai/cugraph/projects/28
+
+
  
\ No newline at end of file
diff --git a/docs/cugraph/source/conf.py b/docs/cugraph/source/conf.py
index 470086b4faa..3f7ef7deb03 100644
--- a/docs/cugraph/source/conf.py
+++ b/docs/cugraph/source/conf.py
@@ -181,11 +181,10 @@
 #  dir menu entry, description, category)
 texinfo_documents = [
     (master_doc, 'cugraph', 'cugraph Documentation',
-     author, 'cugraph', 'One line description of project.',
+     author, 'cugraph', 'GPU-accelerated graph analysis.',
      'Miscellaneous'),
 ]
 
-
 # Example configuration for intersphinx: refer to the Python standard library.
 intersphinx_mapping = {'https://docs.python.org/': None}
 
@@ -209,7 +208,9 @@ def setup(app):
 )
 
 breathe_projects = {
+    'libcugraph': os.environ['XML_DIR_LIBCUGRAPH'],
     'libcugraphops': os.environ['XML_DIR_LIBCUGRAPHOPS'],
     'libwholegraph': os.environ['XML_DIR_LIBWHOLEGRAPH']
 }
+
 breathe_default_project = "libcugraph"
diff --git a/docs/cugraph/source/graph_support/algorithms.md b/docs/cugraph/source/graph_support/algorithms.md
index f6cb7c0d8b1..a1b80e92751 100644
--- a/docs/cugraph/source/graph_support/algorithms.md
+++ b/docs/cugraph/source/graph_support/algorithms.md
@@ -22,7 +22,7 @@ Note: Multi-GPU, or MG, includes support for Multi-Node Multi-GPU (also called M
 
 | Category          | Notebooks                          | Scale               | Notes                                                           |
 | ----------------- | ---------------------------------- | ------------------- | --------------------------------------------------------------- |
-| [Centrality](./algorithms/Centrality.md)        | [Centrality](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/centrality/Centrality.ipynb)          |           |        |
+| [Centrality](./algorithms/Centrality.html )        | [Centrality](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/centrality/Centrality.ipynb)          |           |        |
 |                   | [Katz](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/centrality/Katz.ipynb)                                    | __Multi-GPU__  |                  |
 |                   | [Betweenness Centrality](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/centrality/Betweenness.ipynb)           | __Multi-GPU__  | MG as of 23.06   |
 |                   | [Edge Betweenness Centrality](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/centrality/Betweenness.ipynb)      | __Multi-GPU__  | MG as of 23.08   |
@@ -31,12 +31,12 @@ Note: Multi-GPU, or MG, includes support for Multi-Node Multi-GPU (also called M
 | Community         |                                    |                     |                                                                 |
 |                   | [Leiden](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/community/Louvain.ipynb)                                | __Multi-GPU__  | MG as of 23.06  |
 |                   | [Louvain](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/community/Louvain.ipynb)                               | __Multi-GPU__  |                 |
-|                   | [Ensemble Clustering for Graphs](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/community/ECG.ipynb)            | Single-GPU     |  MG planned for 23.10 |
+|                   | [Ensemble Clustering for Graphs](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/community/ECG.ipynb)            | Single-GPU     |  MG planned for 24.02 |
 |                   | [Spectral-Clustering - Balanced Cut](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/community/Spectral-Clustering.ipynb) | Single-GPU     |      |
 |                   | [Spectral-Clustering - Modularity](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/community/Spectral-Clustering.ipynb)   | Single-GPU          |        |
 |                   | [Subgraph Extraction](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/community/Subgraph-Extraction.ipyn)                | Single-GPU          |        |
 |                   | [Triangle Counting](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/community/Triangle-Counting.ipynb)           | __Multi-GPU__ |           |
-|                   | [K-Truss](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/community/ktruss.ipynb)                                | Single-GPU    |  MG planned for 23.10  |
+|                   | [K-Truss](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/community/ktruss.ipynb)                                | Single-GPU    |  MG planned for 2024  |
 | Components        |          |                |          |
 |                   | [Weakly Connected Components](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/components/ConnectedComponents.ipynb)        | __Multi-GPU__ |        |
 |                   | [Strongly Connected Components](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/components/ConnectedComponents.ipynb)      | Single-GPU    |        |
@@ -55,7 +55,7 @@ Note: Multi-GPU, or MG, includes support for Multi-Node Multi-GPU (also called M
 |                   | [Pagerank](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/link_analysis/Pagerank.ipynb)                | __Multi-GPU__ | [C++ README](cpp/src/centrality/README.md#Pagerank)                |
 |                   | [Personal Pagerank]()                  | __Multi-GPU__ | [C++ README](cpp/src/centrality/README.md#Personalized-Pagerank)   |
 |                   | [HITS](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/link_analysis/HITS.ipynb)        | __Multi-GPU__ |                |
-| [Link Prediction](./algorithms/Similarity.md)   |                                    |                     |                                                                 |
+| [Link Prediction](algorithms/Similarity.html)   |                                    |                     |                                                                 |
 |                   | [Jaccard Similarity](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/link_prediction/Jaccard-Similarity.ipynb)                 | __Multi-GPU__      | Directed graph only                         |
 |                   | [Weighted Jaccard Similarity](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/link_prediction/Jaccard-Similarity.ipynb)        | Single-GPU          |                                                                 |
 |                   | [Overlap Similarity](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/link_prediction/Overlap-Similarity.ipynb)                 | **Multi-GPU** |                                                   |
@@ -65,8 +65,8 @@ Note: Multi-GPU, or MG, includes support for Multi-Node Multi-GPU (also called M
 |                   | [Uniform Random Walks RW](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/sampling/RandomWalk.ipynb)          | __Multi-GPU__ |                                                                 |
 |                   | *Biased Random Walks (RW)*       | ---                 |                                                                 |
 |                   | Egonet                             | __Multi-GPU__ |                                                                 |
-|                   | Node2Vec                           | Single-GPU          |                                             |
-|                   | Uniform Neighborhood sampling      | __Multi-GPU__ |                                                                 |
+|                   | Node2Vec                           | __Multi-GPU__       |                                             |
+|                   | Neighborhood sampling      | __Multi-GPU__ |                                                                 |
 | Traversal         |                                    |                     |                                                                 |
 |                   | Breadth First Search (BFS)         | __Multi-GPU__ | with cutoff support [C++ README](cpp/src/traversal/README.md#BFS) |
 |                   | Single Source Shortest Path (SSSP) | __Multi-GPU__ | [C++ README](cpp/src/traversal/README.md#SSSP)                     |
diff --git a/docs/cugraph/source/graph_support/algorithms/Centrality.md b/docs/cugraph/source/graph_support/algorithms/Centrality.md
index fdd446a4366..8119e655236 100644
--- a/docs/cugraph/source/graph_support/algorithms/Centrality.md
+++ b/docs/cugraph/source/graph_support/algorithms/Centrality.md
@@ -1,7 +1,7 @@
 
 # cuGraph Centrality Notebooks
 
-<img src="../../images/zachary_graph_centrality.png" width="35%"/>
+<img src="../images/zachary_graph_centrality.png" width="35%"/>
 
 The RAPIDS cuGraph Centrality folder contains a collection of Jupyter Notebooks that demonstrate algorithms to identify and quantify the importance of vertices to the structure of the graph.  In the diagram above, the highlighted vertices are highly important and are likely answers to questions like:
 
@@ -23,6 +23,8 @@ But which vertices are most important? The answer depends on which measure/algor
 
 [System Requirements](https://github.com/rapidsai/cugraph/blob/main/notebooks/README.md#requirements)
 
+
+
 | Author Credit |    Date    |  Update          | cuGraph Version |  Test Hardware |
 | --------------|------------|------------------|-----------------|----------------|
 | Brad Rees     | 04/19/2021 | created          | 0.19            | GV100, CUDA 11.0
diff --git a/docs/cugraph/source/graph_support/algorithms/Similarity.md b/docs/cugraph/source/graph_support/algorithms/Similarity.md
index 450beb373a2..18c0a94d519 100644
--- a/docs/cugraph/source/graph_support/algorithms/Similarity.md
+++ b/docs/cugraph/source/graph_support/algorithms/Similarity.md
@@ -15,9 +15,9 @@ Manipulation of the data before or after the graph analytic is not covered here.
 
 |Algorithm          |Notebooks Containing                                                     |Description                                                  |
 | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ |
-|[Jaccard Smiliarity](./jaccard_similarity.md)| [Jaccard Similarity](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/link_prediction/Jaccard-Similarity.ipynb)                 ||
-|[Overlap Similarity](./overlap_similarity.md)| [Overlap Similarity](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/link_prediction/Overlap-Similarity.ipynb)                    ||
-|[Sorensen](./sorensen_coefficient.md)|[Sorensen Similarity](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/link_prediction/Sorensen_coefficient.ipynb)||
+|[Jaccard Smiliarity](./jaccard_similarity.html)| [Jaccard Similarity](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/link_prediction/Jaccard-Similarity.ipynb)                 ||
+|[Overlap Similarity](./overlap_similarity.html)| [Overlap Similarity](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/link_prediction/Overlap-Similarity.ipynb)                    ||
+|[Sorensen](./sorensen_coefficient.html)|[Sorensen Similarity](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/link_prediction/Sorensen_coefficient.ipynb)||
 |Personal Pagerank|[Pagerank](https://github.com/rapidsai/cugraph/blob/main/notebooks/algorithms/link_analysis/Pagerank.ipynb)                 ||
 
 
diff --git a/docs/cugraph/source/index.rst b/docs/cugraph/source/index.rst
index c5303c21674..955eb6d54db 100644
--- a/docs/cugraph/source/index.rst
+++ b/docs/cugraph/source/index.rst
@@ -25,12 +25,12 @@ RAPIDS Graph documentation
    * - :abbr:`libcugraph_etl (C++ renumbering function for strings)`
      - :abbr:`wholegraph (Shared memory-based GPU-accelerated GNN training)`
      -
-
 ..
-
-|
 |
 
+~~~~~~~~~~~~
+Introduction
+~~~~~~~~~~~~
 cuGraph is a library of graph algorithms that seamlessly integrates into the
 RAPIDS data science ecosystem and allows the data scientist to easily call
 graph algorithms using data stored in GPU DataFrames, NetworkX Graphs, or 
@@ -39,6 +39,7 @@ even CuPy or SciPy sparse Matrices.
 Note: We are redoing all of our documents, please be patient as we update
 the docs and links
 
+|
 
 .. toctree::
    :maxdepth: 2
@@ -48,9 +49,8 @@ the docs and links
    installation/index
    tutorials/index
    graph_support/index
+   wholegraph/index
    references/index
-   dev_resources/index
-   releases/index
    api_docs/index
 
 Indices and tables
diff --git a/docs/cugraph/source/wholegraph/basics/index.rst b/docs/cugraph/source/wholegraph/basics/index.rst
new file mode 100644
index 00000000000..429fe35d601
--- /dev/null
+++ b/docs/cugraph/source/wholegraph/basics/index.rst
@@ -0,0 +1,11 @@
+======
+Basics
+======
+
+
+.. toctree::
+   :maxdepth: 2
+
+   wholegraph_intro
+   wholememory_intro
+   wholememory_implementation_details
diff --git a/docs/cugraph/source/wholegraph/basics/wholegraph_intro.md b/docs/cugraph/source/wholegraph/basics/wholegraph_intro.md
new file mode 100644
index 00000000000..360f8e0e36b
--- /dev/null
+++ b/docs/cugraph/source/wholegraph/basics/wholegraph_intro.md
@@ -0,0 +1,135 @@
+# WholeGraph Introduction
+WholeGraph helps train large-scale Graph Neural Networks(GNN).
+WholeGraph provides underlying storage structure called WholeMemory.
+WholeMemory is a Tensor like storage and provides multi-GPU support.
+It is optimized for NVLink systems like DGX A100 servers.
+By working together with cuGraph, cuGraph-Ops, cuGraph-DGL, cuGraph-PyG, and upstream DGL and PyG,
+it will be easy to build GNN applications.
+
+## WholeMemory
+WholeMemory can be regarded as a whole view of GPU memory.
+WholeMemory exposes a handle of the memory instance no matter how the underlying data is stored across multiple GPUs.
+WholeMemory assumes that separate process is used to control each GPU.
+
+### WholeMemory Basics
+To define WholeMemory, we need to specify the following:
+
+#### 1. Specify the set of GPU to handle the Memory
+
+Since WholeMemory is owned by a set of GPUs, you must specify the set of GPUs.
+This is done by creating [WholeMemory Communicator](#wholememory-communicator) and specifying the WholeMemory Communicator when creating WholeMemory.
+
+#### 2. Specify the location of the memory
+
+Although WholeMemory is owned by a set of GPUs, the memory itself can be located in host memory or in device memory.
+The location of the memory need to be specified, two types of locations can be specified.
+
+- **Host memory**: will use pinned host memory as underlying storage.
+- **Device memory**: will use GPU device memory as underlying storage.
+
+#### 3. Specify the address mapping mode of the memory
+
+As WholeMemory is owned by multiple GPUs, each GPU will access the whole memory space, so we need address mapping.
+There are three types of address mapping modes (also known as WholeMemory types), they are:
+
+- **Continuous**: All memory from each GPU will be mapped into a single continuous memory address space for each GPU.
+  In this mode, each GPU can directly access the whole memory using a single pointer and offset, just like using normal
+  device memory. Software will see no difference. Hardware peer to peer access will handle the underlying communication.
+
+- **Chunked**: Memory from each GPU will be mapped into different memory chunks, one chunk for each GPU.
+  In this mode, direct access is also supported, but not using a single pointer. Software will see the chunked memory.
+  However, an abstract layer may help to hide this.
+
+- **Distributed**: Memory from other GPUs are not mapped into current GPU, so no direct access is supported.
+  To access memory of other GPU, explicit communication is needed.
+
+To learn more details about WholeMemory locations and WholeMemory types, please refer to
+[WholeMemory Implementation Details](wholememory_implementation_details.md)
+
+### WholeMemory Communicator
+WholeMemory Communicator has two main purpose:
+
+- **Defines a set of GPUs which works together on WholeMemory.** WholeMemory Communicator is created by all GPUs that
+  wants to work together. A WholeMemory Communicator can be reused as long as the GPU set needed is the same.
+- **Provides underlying communication channel needed by WholeMemory.** WholeMemory may need commuincator between GPUs
+  during the WholeMemory creation and some OPs on some types of WholeMemory.
+
+To Create WholeMemory Communicator, a WholeMemory Unique ID needs to be created first, it is usually created by the first GPU in the set of GPUs, and then broadcasted to all GPUs that want to work together. Then all GPUs in this communicator
+will call WholeMemory Communicator creation function using this WholeMemory Unique ID, and the rank of current GPU as
+well as all GPU count.
+
+### WholeMemory Granularity
+As underlying storage may be partitioned into multiple GPUs physically, this is usually not wanted inside one single
+user data block. To help on this, when creating WholeMemory, the granularity of data can be specified. Then the
+WholeMemory is considered as multiple block of the same granularity and will not get split inside the granularity.
+
+### WholeMemory Mapping
+As WholeMemory provides a whole view of memory to GPU, to access WholeMemory, mapping is usually needed.
+Different types of WholeMemory have different mapping methods supported as their names.
+Some mappings supported include
+- All the WholeMemory types support mapping the memory range that local GPU is responsible for.
+  That is, each rank can directly access "Local" memory in all types of WholeMemory.
+  Here "Local" memory doesn't have to be on current GPU's memory, it can be on host memory or even maybe on other GPU,
+  but it is guaranteed to be directly accessed by current GPU.
+- Chunked and Continuous WholeMemory also support Chunked mapping. That is, memory of all GPUs can be mapped into
+  current GPU, one continuous chunk for one GPU. Each chunk can be directly accessed by current GPU. But the memory of
+  different chunks are not guaranteed to be continuous.
+- Continuous WholeMemory can be mapped into continuous memory space. That is, memory of all GPUs are mapped into a
+  single range of virtual memory, accessing to different position of this memory will physically access to different
+  GPUs. This mapping will be handled by hardware (CPU pagetable or GPU pagetable).
+
+### Operations on WholeMemory
+There are some operations that can be performed on WholeMemory. They are based on the mapping of WholeMemory.
+#### Local Operation
+As all WholeMemory supports mapping of local memory, so operation on local memory is supported. The operation can be
+either read or write. Just use it as GPU memory of current device is OK.
+#### Load and Store
+To facilitate file operation, Load / Store WholeMemory from file or to file is supported. WholeMemory uses raw binary
+file format for disk operation. For Load, the input file can be a single file or a list of files, if it is a list, they
+will be logically concatenated together and then loaded. For store, each GPU stores its local memory to file, producing
+a list of files.
+#### Gather and Scatter
+WholeMemory also supports Gather / Scatter operation, usually they operate on a
+[WholeMemory Tensor](#wholememory-tensor).
+
+### WholeMemory Tensor
+Compared to PyTorch, WholeMemory is like PyTorch Storage while a WholeMemory Tensor is like a PyTorch Tensor.
+For now, WholeMemory supports only 1D and 2D tensors, or arrays and matrices. Only first dimension is partitioned.
+
+### WholeMemory Embedding
+WholeMemory Embedding is just like a 2D WholeMemory Tensor, with two features added. They support cache and sparse
+optimizers.
+#### Cache Support
+To create WholeMemory Embedding with a cache, WholeMemory CachePolicy needs to be be created first. WholeMemoryCachePolicy can be created with following fields:
+- **WholeMemory Communicator**: WholeMemory CachePolicy also needs WholeMemory Communicator.
+  WholeMemory Communicator defines the set of GPUs that cache all the Embedding.
+  It can be the same as the WholeMemory Communicator used to create WholeMemory Embedding.
+- **WholeMemory type**: WholeMemory CachePolicy uses WholeMemory type to specify the WholeMemory type of cache.
+- **WholeMemory location**: WholeMemory CachePolicy use WholeMemory location to specify the location of the cache.
+- **Access type**: Access type can be readonly or readwrite.
+- **Cache ratio**: Specify how much memory the cache will use. This ratio is computed for each GPU set that caches the
+  whole embedding.
+
+The two most commonly used caches are:
+- **Device cached host memory**: When the WholeMemory Communicator for Cache Policy is the same as the WholeMemory
+  Communicator used to create WholeMemory Embedding, it means that the cache has same GPU set as WholeMemory Embedding.
+  So each GPU just caches its own part of raw Embedding.
+  Most commonly, when raw WholeMemory Embedding is located on host memory, and the cache is on device
+  memory, each GPU just caches its own part of host memory.
+- **Local cached global memory**: The WholeMemory Communicator of WholeMemory CachePolicy can also be a subset of the
+  WholeMemory Communicator of WholeMemory Embedding. In this case, the subset of GPUs together cache all the embeddings.
+  Normally, when raw WholeMemory Embedding is partitioned on different machine nodes, and we
+  want to cache some embeddings in local machine or local GPU, then the subset of GPU can be all the GPUs in the local
+  machine. For local cached global memory, only readonly is supported.
+
+#### WholeMemory Embedding Sparse Optimizer
+Another feature of WholeMemory Embedding is that WholeMemory Embedding supports embedding training.
+To efficiently train large embedding tables, a sparse optimizer is needed.
+WholeMemory Embedding Sparse Optimizer can run on a cached or noncached WholeMemory Embedding.
+Currently supported optimizers include SGD, Adam, RMSProp and AdaGrad.
+
+## Graph Structure
+Graph structure in WholeGraph is also based on WholeMemory.
+In WholeGraph, graph is stored in [CSR format](https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format)).
+Both ROW_INDEX (noted as `csr_row_ptr`) and COL_INDEX (notated as `csr_col_ind`) are stored in a
+WholeMemory Tensor. So loading Graph Structure can use [WholeMemory Tensor Loading mechanism](#load-and-store).
diff --git a/docs/cugraph/source/wholegraph/basics/wholememory_implementation_details.md b/docs/cugraph/source/wholegraph/basics/wholememory_implementation_details.md
new file mode 100644
index 00000000000..a5541109c4f
--- /dev/null
+++ b/docs/cugraph/source/wholegraph/basics/wholememory_implementation_details.md
@@ -0,0 +1,58 @@
+# WholeMemory Implementation Details
+As described in [WholeMemory Introduction](wholegraph_intro.md), there are two WholeMemory location and three
+WholeMemory types. So there will be total six WholeMemory.
+
+|     Type      | CONTINUOUS  | CONTINUOUS |  CHUNKED  |  CHUNKED  | DISTRIBUTED | DISTRIBUTED |
+|:-------------:|:-----------:|:----------:|:---------:|:---------:|:-----------:|:-----------:|
+|   Location    |   DEVICE    |    HOST    |  DEVICE   |   HOST    |   DEVICE    |    HOST     |
+| Allocated by  |    EACH     |   FIRST    |   EACH    |   FIRST   |    EACH     |    EACH     |
+| Allocate API  |   Driver    |    Host    |  Runtime  |   Host    |   Runtime   |   Runtime   |
+|  IPC Mapping  |   Unix fd   |    mmap    |  cudaIpc  |   mmap    | No IPC map  | No IPC map  |
+
+For "Continuous" and "Chunked" types of WholeMemory, all memory is mapped to each GPU,
+so these two types are all "Mapped" WholeMemory, in contrast to "Distributed" WholeMemory where all are not mapped.
+
+## WholeMemory Layout
+Since the underlying memory of a single WholeMemory object may be on multiple GPU devices, the WholeGraph library will
+partition data into these GPU devices.
+The partition method guarantees that each GPU can access one continuous part of the entire memory.
+Here "can access" means can directly access from CUDA kernels, but the memory doesn't have to be physically on that GPU.
+For example,it can be on host memory or other GPU's device memory that can be access using P2P.
+In that case the stored data has its own granularity that shouldn't be split. Data granularity can be specified while 
+creating WholeMemory. Then each data granularity can be considered as a block of data.
+
+The follow figure shows the layout of 15 data block over 4 GPUs.
+![WholeMemory Layout](../imgs/general_wholememory.png)
+
+For WholeMemory Tensors, they can be 1D or 2D tensors.
+For 1D tensor, data granularity is one element. For 2D tensor, data granularity is its 1D tensor.
+The layout will be like this:
+![WholeMemory Tensor Layout](../imgs/wholememory_tensor.png)
+
+## WholeMemory Allocation
+As there are six types of WholeMemory, the allocation process of each type are as follows:
+
+### Device Continuous WholeMemory
+For Device Continuous WholeMemory, first a range of virtual address space is reserved in each GPU, which covers the
+entire memory range. Then a part of pyhsical memory is allocated in each GPU, as shown in the following figure.
+![Device Continuous WholeMemory Allocation Step 1](../imgs/device_continuous_wholememory_step1.png)
+After that, each GPU gathers all the memory handles from all GPUs, and maps them to the reserved address space.
+![Device Continuous WholeMemory Allocation Step 2](../imgs/device_continuous_wholememory_step2.png)
+
+### Device Chunked WholeMemory
+For Device Chunked WholeMemory, first each GPU allocates its own part of memory using CUDA runtime API, this will create
+both a virtual address space and physical memory for its own memory.
+![Device Chunked WholeMemory Allocation Step 1](../imgs/device_chunked_wholememory_step1.png)
+Each GPU gathers the Ipc handle of memory from all other GPUs, and maps that into its own virtual address space.
+![Device Chunked WholeMemory Allocation Step 2](../imgs/device_chunked_wholememory_step2.png)
+
+### Host Mapped WholeMemory
+For Host, Continuous and Chunked are using the same method. First, rank and allocate the host physical and share that to all
+ranks.
+![Host Mapped WholeMemory Allocation Step 1](../imgs/host_mapped_wholememory_step1.png)
+Then each rank registers that host memory to GPU address space.
+![Host Mapped WholeMemory Allocation Step 2](../imgs/host_mapped_wholememory_step2.png)
+
+### Distributed WholeMemory
+For Distributed WholeMemory, each GPU just malloc its own part of memory, no need to share to other GPUs.
+![Distributed WholeMemory Allocation](../imgs/distributed_wholememory.png)
diff --git a/docs/cugraph/source/wholegraph/basics/wholememory_intro.md b/docs/cugraph/source/wholegraph/basics/wholememory_intro.md
new file mode 100644
index 00000000000..7209da9471c
--- /dev/null
+++ b/docs/cugraph/source/wholegraph/basics/wholememory_intro.md
@@ -0,0 +1,123 @@
+## WholeMemory
+WholeMemory can be regarded as a whole view of GPU memory.
+WholeMemory exposes a handle to the memory instance no matter how the underlying data is stored across multiple GPUs.
+WholeMemory assumes that a separate process is used to control each GPU.
+
+### WholeMemory Basics
+To define WholeMemory, we need to specify the following:
+
+#### 1. Specify the set of GPU to handle the Memory
+
+As WholeMemory is owned by a set of GPUs, so the set of GPUs need to be specified.
+This is done by creating [WholeMemory Communicator](#wholememory-communicator) and specify the WholeMemory Communicator
+when creating WholeMemory.
+
+#### 2. Specify the location of the memory
+
+Although WholeMemory is owned by a set of GPUs,  the memory itself can be located on host memory or on device memory.
+So the location of the memory needs to be specified. Two types of location can be specified.
+
+- **Host memory**: will use pinned host memory as underlying storage.
+- **Device memory**: will use GPU device memory as underlying storage.
+
+#### 3. Specify the address mapping mode of the memory
+
+As WholeMemory is owned by multiple GPUs, each GPU will access the whole memory space, so we need address mapping.
+There are three types of address mapping modes (also known as WholeMemory types), they are:
+
+- **Continuous**: All memory from each GPU will be mapped into a single continuous memory address space for each GPU.
+  In this mode, each GPU can directly access the whole memory using a single pointer and offset, just like using normal
+  device memory. Software will see no difference. Hardware peer-to-peer access will handle the underlying communication.
+
+- **Chunked**: Memory from each GPU will be mapped into different memory chunks, one chunk for each GPU.
+  In this mode, direct access is also supported, but not using a single pointer. Software will see the chunked memory.
+  However, an abstract layer can hide this.
+
+- **Distributed**: Memory from other GPUs is not mapped into current GPU, so no direct access is supported.
+  To access memory of another GPU, explicit communication is needed.
+
+If you would like to know more details about WholeMemory locations and WholeMemory types, please refer to
+[WholeMemory Implementation Details](wholememory_implementation_details.md)
+
+### WholeMemory Communicator
+WholeMemory Communicator has two main purpose:
+
+- **Defines a set of GPUs which works together on WholeMemory.** WholeMemory Communicator is created by all GPUs that
+  wants to work together. A WholeMemory Communicator can be reused as long as the GPU set needed is the same.
+- **Provides underlying communication channel needed by WholeMemory.** WholeMemory may need commuincator between GPUs
+  during the WholeMemory creation and some OPs on some types of WholeMemory.
+
+To Create WholeMemory Communicator, a WholeMemory Unique ID need to be created first, it is usually created by the first
+GPU in the set of GPUs, and then broadcasted to all GPUs that want to work together. Then all GPUs in this communicator
+will call WholeMemory Communicator creation function using this WholeMemory Unique ID, and the rank of current GPU as
+well as all GPU count.
+
+### WholeMemory Granularity
+As underlying storage may be physically partitioned into multiple GPUs, it is usually not wanted inside one single
+user data block. To help with this, when creating WholeMemory, the granularity of data can be specified. Therefore
+WholeMemory is considered as multiple blocks of the same granularity and will not get split inside the granularity.
+
+### WholeMemory Mapping
+Since WholeMemory provides a whole view of memory to GPU,  mapping is usually needed to access WholeMemory.
+Different types of WholeMemory have different mapping methods supported as their names.
+Some mappings supported include:
+- All the WholeMemory types support mapping the memory range that local GPU is responsible for.
+  That is, each rank can directly access "Local" memory in all types of WholeMemory.
+  Here "Local" memory doesn't have to be on current GPU's memory, it can be on host memory or even maybe on other GPU,
+  but it is guaranteed to be directly accessed by current GPU.
+- Chunked and Continuous WholeMemory also support Chunked mapping. That is, memory of all GPUs can be mapped into
+  current GPU, one continuous chunk for one GPU. Each chunk can be directly accessed by current GPU. But the memory of
+  different chunks are not guaranteed to be continuous.
+- Continuous WholeMemory can be mapped into continuous memory space. That is, memory of all GPUs are mapped into a
+  single range of virtual memory, accessing different positions of this memory will physically access different
+  GPUs. This mapping will be handled by hardware (CPU pagetable or GPU pagetable).
+
+### Operations on WholeMemory
+There are some operations that can be performed on WholeMemory. They are based on the mapping of WholeMemory.
+#### Local Operation
+As all WholeMemory supports mapping of local memory, so operation on local memory is supported. The operation can be
+either read or write. Just use it as GPU memory of current device is OK.
+#### Load / Store
+To facilitate file operation, Load / Store WholeMemory from file or to file is supported. WholeMemory use raw binary
+file format for disk operation. For Load, the input file can be single file or a list of files, if it is a list, they
+will be logically concatenated together and then loaded. For store, each GPU stores its local memory to file, producing
+a list of files.
+#### Gather / Scatter
+WholeMemory also supports Gather / Scatter operations, usually they operate on a
+[WholeMemory Tensor](#wholememory-tensor).
+
+### WholeMemory Tensor
+Compared to PyTorch, WholeMemory is like PyTorch Storage while WholeMemory Tensor is like PyTorch Tensor.
+For now, WholeMemory supports only 1D and 2D tensor, or array and matrix. Only first dimension is partitioned.
+
+### WholeMemory Embedding
+WholeMemory Embedding is just like 2D WholeMemory Tensor, with  cache support and sparse optimizer support added.
+#### Cache Support
+WholeMemory Embedding supports cache. To create WholeMemory Embedding with cache, WholeMemory CachePolicy need first be
+created. WholeMemoryCachePolicy can be created with following fields:
+- **WholeMemory Communicator**: WholeMemory CachePolicy also need WholeMemory Communicator.
+  This WholeMemory Communicator defines the set of GPUs that cache the all the Embedding.
+  It can be the same as the WholeMemory Communicator used to create WholeMemory Embedding.
+- **WholeMemory type**: WholeMemory CachePolicy uses WholeMemory type to specify the WholeMemory type of the cache.
+- **WholeMemory location**: WholeMemory CachePolicy uses WholeMemory location to specify the location of the cache.
+- **Access type**: Access type can be readonly or readwrite.
+- **Cache ratio**: Specify how much memory the cache will use. This ratio is computed for each GPU set that caches the
+  whole embedding.
+
+There are two most commonly used caches. They are:
+- **Device cached host memory**: When the WholeMemory Communicator for Cache Policy is the same as the WholeMemory
+  Communicator used to create WholeMemory Embedding, it means that cache has the same GPU set as WholeMemory Embedding.
+  So each GPU just cache its own part of raw Embedding.
+  Normally, when raw WholeMemory Embedding is located on host memory, and the cache is on device
+  memory, each GPU just caches its own part of host memory.
+- **Local cached global memory**: The WholeMemory Communicator of WholeMemory CachePolicy can also be a subset of the
+  WholeMemory Communicator of WholeMemory Embedding. In this case, the subset of GPUs together cache all the embeddings.
+  Typically, raw WholeMemory Embedding is partitioned on different machine nodes, and we
+  want to cache some embeddings in local machine or local GPU, then the subset of GPUs can be all the GPUs on the local
+  machine. For local cached global memory supports just readonly.
+
+#### WholeMemory Embedding Sparse Optimizer
+Another feature of WholeMemory Embedding is that WholeMemory Embedding supports embedding training.
+To efficiently train large embedding tables, a sparse optimizer is needed.
+The WholeMemory Embedding Sparse Optimizer can run on cached or non-cached WholeMemory Embedding.
+Currently supported optimizers include SGD, Adam, RMSProp and AdaGrad.
diff --git a/docs/cugraph/source/wholegraph/imgs/device_chunked_wholememory_step1.png b/docs/cugraph/source/wholegraph/imgs/device_chunked_wholememory_step1.png
new file mode 100644
index 00000000000..b8a0447e6fb
Binary files /dev/null and b/docs/cugraph/source/wholegraph/imgs/device_chunked_wholememory_step1.png differ
diff --git a/docs/cugraph/source/wholegraph/imgs/device_chunked_wholememory_step2.png b/docs/cugraph/source/wholegraph/imgs/device_chunked_wholememory_step2.png
new file mode 100644
index 00000000000..8b203ce2246
Binary files /dev/null and b/docs/cugraph/source/wholegraph/imgs/device_chunked_wholememory_step2.png differ
diff --git a/docs/cugraph/source/wholegraph/imgs/device_continuous_wholememory_step1.png b/docs/cugraph/source/wholegraph/imgs/device_continuous_wholememory_step1.png
new file mode 100644
index 00000000000..46ecd1f14e7
Binary files /dev/null and b/docs/cugraph/source/wholegraph/imgs/device_continuous_wholememory_step1.png differ
diff --git a/docs/cugraph/source/wholegraph/imgs/device_continuous_wholememory_step2.png b/docs/cugraph/source/wholegraph/imgs/device_continuous_wholememory_step2.png
new file mode 100644
index 00000000000..b773b1ef6e9
Binary files /dev/null and b/docs/cugraph/source/wholegraph/imgs/device_continuous_wholememory_step2.png differ
diff --git a/docs/cugraph/source/wholegraph/imgs/distributed_wholememory.png b/docs/cugraph/source/wholegraph/imgs/distributed_wholememory.png
new file mode 100644
index 00000000000..e6bbe9f13e9
Binary files /dev/null and b/docs/cugraph/source/wholegraph/imgs/distributed_wholememory.png differ
diff --git a/docs/cugraph/source/wholegraph/imgs/general_wholememory.png b/docs/cugraph/source/wholegraph/imgs/general_wholememory.png
new file mode 100644
index 00000000000..3ece02b007b
Binary files /dev/null and b/docs/cugraph/source/wholegraph/imgs/general_wholememory.png differ
diff --git a/docs/cugraph/source/wholegraph/imgs/host_mapped_wholememory_step1.png b/docs/cugraph/source/wholegraph/imgs/host_mapped_wholememory_step1.png
new file mode 100644
index 00000000000..aad8caf0d07
Binary files /dev/null and b/docs/cugraph/source/wholegraph/imgs/host_mapped_wholememory_step1.png differ
diff --git a/docs/cugraph/source/wholegraph/imgs/host_mapped_wholememory_step2.png b/docs/cugraph/source/wholegraph/imgs/host_mapped_wholememory_step2.png
new file mode 100644
index 00000000000..20597f3e515
Binary files /dev/null and b/docs/cugraph/source/wholegraph/imgs/host_mapped_wholememory_step2.png differ
diff --git a/docs/cugraph/source/wholegraph/imgs/wholememory_tensor.png b/docs/cugraph/source/wholegraph/imgs/wholememory_tensor.png
new file mode 100644
index 00000000000..e725d6c28ed
Binary files /dev/null and b/docs/cugraph/source/wholegraph/imgs/wholememory_tensor.png differ
diff --git a/docs/cugraph/source/wholegraph/index.rst b/docs/cugraph/source/wholegraph/index.rst
new file mode 100644
index 00000000000..2a69544b4c9
--- /dev/null
+++ b/docs/cugraph/source/wholegraph/index.rst
@@ -0,0 +1,14 @@
+WholeGraph
+==========
+RAPIDS WholeGraph has following package:
+
+* pylibwholegraph: shared memory-based GPU-accelerated GNN training
+
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   basics/index
+   installation/index
+
diff --git a/docs/cugraph/source/wholegraph/installation/container.md b/docs/cugraph/source/wholegraph/installation/container.md
new file mode 100644
index 00000000000..3a2c627c56a
--- /dev/null
+++ b/docs/cugraph/source/wholegraph/installation/container.md
@@ -0,0 +1,29 @@
+# Build Container for WholeGraph
+To run WholeGraph or build WholeGraph from source, set up the environment first.
+We recommend using Docker images.
+For example, to build the WholeGraph base image from the NGC pytorch 22.10 image, you can follow `Dockerfile`:
+```dockerfile
+FROM nvcr.io/nvidia/pytorch:22.10-py3
+
+RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y lsb-core software-properties-common wget libspdlog-dev
+
+#RUN remove old cmake to update
+RUN conda remove --force -y cmake
+RUN rm -rf /usr/local/bin/cmake && rm -rf /usr/local/lib/cmake && rm -rf /usr/lib/cmake
+
+RUN apt-key adv --fetch-keys https://apt.kitware.com/keys/kitware-archive-latest.asc && \
+    export LSB_CODENAME=$(lsb_release -cs) && \
+    apt-add-repository -y "deb https://apt.kitware.com/ubuntu/ ${LSB_CODENAME} main" && \
+    apt update && apt install -y cmake
+
+# update py for pytest
+RUN pip3 install -U py
+RUN pip3 install Cython setuputils3 scikit-build nanobind pytest-forked pytest
+```
+
+To run GNN applications, you may also need cuGraphOps, DGL and/or PyG libraries to run the GNN layers.
+You may refer to [DGL](https://www.dgl.ai/pages/start.html) or [PyG](https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html)
+For example, to install DGL, you may need to add:
+```dockerfile
+RUN pip3 install  dgl -f https://data.dgl.ai/wheels/cu118/repo.html
+```
diff --git a/docs/cugraph/source/wholegraph/installation/getting_wholegraph.md b/docs/cugraph/source/wholegraph/installation/getting_wholegraph.md
new file mode 100644
index 00000000000..5b2072b0523
--- /dev/null
+++ b/docs/cugraph/source/wholegraph/installation/getting_wholegraph.md
@@ -0,0 +1,48 @@
+
+# Getting the WholeGraph Packages
+
+Start by reading the [RAPIDS Instalation guide](https://docs.rapids.ai/install)  
+and checkout the [RAPIDS install selector](https://rapids.ai/start.html) for a pick list of install options.
+
+
+There are 4 ways to get WholeGraph packages:
+1. [Quick start with Docker Repo](#docker)
+2. [Conda Installation](#conda)
+3. [Pip Installation](#pip)
+4. [Build from Source](./source_build.md)
+
+
+<br>
+
+## Docker
+The RAPIDS Docker containers (as of Release 23.10) contain all RAPIDS packages, including WholeGraph, as well as all required supporting packages.   To download a container, please see the [Docker Repository](https://hub.docker.com/r/rapidsai/rapidsai/), choosing a tag based on the NVIDIA CUDA version you’re running.  This provides a ready to run Docker container with example notebooks and data, showcasing how you can utilize all of the RAPIDS libraries.
+
+<br>
+
+
+## Conda
+It is easy to install WholeGraph using conda. You can get a minimal conda installation with [Miniconda](https://conda.io/miniconda.html) or get the full installation with [Anaconda](https://www.anaconda.com/download).
+
+WholeGraph conda packages
+ * libwholegraph
+ * pylibwholegraph
+
+Replace the package name in the example below to the one you want to install.
+
+
+Install and update WholeGraph using the conda command:
+
+```bash
+conda install -c rapidsai -c conda-forge -c nvidia wholegraph cudatoolkit=11.8
+```
+
+<br>
+
+## PIP
+wholegraph, and all of RAPIDS, is available via pip.
+
+```
+pip install wholegraph-cu11 --extra-index-url=https://pypi.nvidia.com
+```
+
+<br>
diff --git a/docs/cugraph/source/wholegraph/installation/index.rst b/docs/cugraph/source/wholegraph/installation/index.rst
new file mode 100644
index 00000000000..09f1cb44a24
--- /dev/null
+++ b/docs/cugraph/source/wholegraph/installation/index.rst
@@ -0,0 +1,9 @@
+Installation
+============
+
+.. toctree::
+    :maxdepth: 2
+
+    getting_wholegraph
+    container
+    source_build
diff --git a/docs/cugraph/source/wholegraph/installation/source_build.md b/docs/cugraph/source/wholegraph/installation/source_build.md
new file mode 100644
index 00000000000..c468048c351
--- /dev/null
+++ b/docs/cugraph/source/wholegraph/installation/source_build.md
@@ -0,0 +1,187 @@
+# Building from Source
+
+The following instructions are for users wishing to build wholegraph from source code. These instructions are tested on supported distributions of Linux,CUDA,
+and Python - See [RAPIDS Getting Started](https://rapids.ai/start.html) for a list of supported environments.
+Other operating systems _might be_ compatible, but are not currently tested.
+
+The wholegraph package includes both a C/C++ CUDA portion and a python portion. Both libraries need to be installed in order for cuGraph to operate correctly.
+The C/C++ CUDA library is `libwholegraph` and the python library is `pylibwholegraph`.
+
+## Prerequisites
+
+__Compiler__:
+* `gcc`         version 11.0+
+* `nvcc`        version 11.0+
+* `cmake`       version 3.26.4+
+
+__CUDA__:
+* CUDA 11.8+
+* NVIDIA driver 450.80.02+
+* Pascal architecture or better
+
+You can obtain CUDA from [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads).
+
+__Other Packages__:
+* ninja
+* nccl
+* cython
+* setuputils3
+* scikit-learn
+* scikit-build
+* nanobind>=0.2.0
+
+## Building wholegraph
+To install wholegraph from source, ensure the dependencies are met.
+
+### Clone Repo and Configure Conda Environment
+__GIT clone a version of the repository__
+
+  ```bash
+  # Set the location to wholegraph in an environment variable WHOLEGRAPH_HOME
+  export WHOLEGRAPH_HOME=$(pwd)/wholegraph
+
+  # Download the wholegraph repo - if you have a forked version, use that path here instead
+  git clone https://github.com/rapidsai/wholegraph.git $WHOLEGRAPH_HOME
+
+  cd $WHOLEGRAPH_HOME
+  ```
+
+__Create the conda development environment__
+
+```bash
+# create the conda environment (assuming in base `wholegraph` directory)
+
+# for CUDA 11.x
+conda env create --name wholegraph_dev --file conda/environments/all_cuda-118_arch-x86_64.yaml
+
+# activate the environment
+conda activate wholegraph_dev
+
+# to deactivate an environment
+conda deactivate
+```
+
+  - The environment can be updated as development includes/changes the dependencies. To do so, run:
+
+
+```bash
+
+# Where XXX is the CUDA version
+conda env update --name wholegraph_dev --file conda/environments/all_cuda-XXX_arch-x86_64.yaml
+
+conda activate wholegraph_dev
+```
+
+
+### Build and Install Using the `build.sh` Script
+Using the `build.sh` script make compiling and installing wholegraph a
+breeze. To build and install, simply do:
+
+```bash
+$ cd $WHOLEGRAPH_HOME
+$ ./build.sh clean
+$ ./build.sh libwholegraph
+$ ./build.sh pylibwholegraph
+```
+
+There are several other options available on the build script for advanced users.
+`build.sh` options:
+```bash
+build.sh [<target> ...] [<flag> ...]
+ where <target> is:
+   clean                    - remove all existing build artifacts and configuration (start over).
+   uninstall                - uninstall libwholegraph and pylibwholegraph from a prior build/install (see also -n)
+   libwholegraph            - build the libwholegraph C++ library.
+   pylibwholegraph          - build the pylibwholegraph Python package.
+   tests                    - build the C++ (OPG) tests.
+   benchmarks               - build benchmarks.
+   docs                     - build the docs
+ and <flag> is:
+   -v                          - verbose build mode
+   -g                          - build for debug
+   -n                          - no install step
+   --allgpuarch               - build for all supported GPU architectures
+   --cmake-args=\\\"<args>\\\" - add arbitrary CMake arguments to any cmake call
+   --compile-cmd               - only output compile commands (invoke CMake without build)
+   --clean                    - clean an individual target (note: to do a complete rebuild, use the clean target described above)
+   -h | --h[elp]               - print this text
+
+ default action (no args) is to build and install 'libwholegraph' then 'pylibwholegraph' targets
+
+examples:
+$ ./build.sh clean                        # remove prior build artifacts (start over)
+$ ./build.sh
+
+# make parallelism options can also be defined: Example build jobs using 4 threads (make -j4)
+$ PARALLEL_LEVEL=4 ./build.sh libwholegraph
+
+Note that the libraries will be installed to the location set in `$PREFIX` if set (i.e. `export PREFIX=/install/path`), otherwise to `$CONDA_PREFIX`.
+```
+
+
+## Building each section independently
+### Build and Install the C++/CUDA `libwholegraph` Library
+CMake depends on the `nvcc` executable being on your path or defined in `$CUDACXX`.
+
+This project uses cmake for building the C/C++ library. To configure cmake, run:
+
+  ```bash
+  # Set the location to wholegraph in an environment variable WHOLEGRAPH_HOME
+  export WHOLEGRAPH_HOME=$(pwd)/wholegraph
+
+  cd $WHOLEGRAPH_HOME
+  cd cpp                                        # enter cpp directory
+  mkdir build                                   # create build directory
+  cd build                                      # enter the build directory
+  cmake .. -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX
+
+  # now build the code
+  make -j                                       # "-j" starts multiple threads
+  make install                                  # install the libraries
+  ```
+The default installation locations are `$CMAKE_INSTALL_PREFIX/lib` and `$CMAKE_INSTALL_PREFIX/include/wholegraph` respectively.
+
+### Building and installing the Python package
+
+Build and Install the Python packages to your Python path:
+
+```bash
+cd $WHOLEGRAPH_HOME
+cd python
+cd pylibwholegraph
+python setup.py build_ext --inplace
+python setup.py install    # install pylibwholegraph
+```
+
+## Run tests
+
+Run either the C++ or the Python tests with datasets
+
+  - **Python tests with datasets**
+
+    ```bash
+    cd $WHOLEGRAPH_HOME
+    cd python
+    pytest
+    ```
+
+  - **C++ stand alone tests**
+
+    From the build directory :
+
+    ```bash
+    # Run the tests
+    cd $WHOLEGRAPH_HOME
+    cd cpp/build
+    gtests/PARALLEL_UTILS_TESTS		# this is an executable file
+    ```
+
+
+Note: This conda installation only applies to Linux and Python versions 3.8/3.10.
+
+## Creating documentation
+
+Python API documentation can be generated from _./docs/wholegraph directory_. Or through using "./build.sh docs"
+
+## Attribution
+Portions adopted from https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md
diff --git a/python/cugraph-pyg/cugraph_pyg/loader/cugraph_node_loader.py b/python/cugraph-pyg/cugraph_pyg/loader/cugraph_node_loader.py
index ad8d22e255e..200a82b460b 100644
--- a/python/cugraph-pyg/cugraph_pyg/loader/cugraph_node_loader.py
+++ b/python/cugraph-pyg/cugraph_pyg/loader/cugraph_node_loader.py
@@ -52,7 +52,9 @@ def __init__(
         graph_store: CuGraphStore,
         input_nodes: InputNodes = None,
         batch_size: int = 0,
+        *,
         shuffle: bool = False,
+        drop_last: bool = True,
         edge_types: Sequence[Tuple[str]] = None,
         directory: Union[str, tempfile.TemporaryDirectory] = None,
         input_files: List[str] = None,
@@ -209,26 +211,31 @@ def __init__(
 
         # Truncate if we can't evenly divide the input array
         stop = (len(input_nodes) // batch_size) * batch_size
-        input_nodes = input_nodes[:stop]
+        input_nodes, remainder = cupy.array_split(input_nodes, [stop])
 
         # Split into batches
-        input_nodes = cupy.split(input_nodes, len(input_nodes) // batch_size)
+        input_nodes = cupy.split(input_nodes, max(len(input_nodes) // batch_size, 1))
+
+        if not drop_last:
+            input_nodes.append(remainder)
 
         self.__num_batches = 0
         for batch_num, batch_i in enumerate(input_nodes):
-            self.__num_batches += 1
-            bulk_sampler.add_batches(
-                cudf.DataFrame(
-                    {
-                        "start": batch_i,
-                        "batch": cupy.full(
-                            batch_size, batch_num + starting_batch_id, dtype="int32"
-                        ),
-                    }
-                ),
-                start_col_name="start",
-                batch_col_name="batch",
-            )
+            batch_len = len(batch_i)
+            if batch_len > 0:
+                self.__num_batches += 1
+                bulk_sampler.add_batches(
+                    cudf.DataFrame(
+                        {
+                            "start": batch_i,
+                            "batch": cupy.full(
+                                batch_len, batch_num + starting_batch_id, dtype="int32"
+                            ),
+                        }
+                    ),
+                    start_col_name="start",
+                    batch_col_name="batch",
+                )
 
         bulk_sampler.flush()
         self.__input_files = iter(
diff --git a/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py b/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py
index 27b73bf7d35..9813fa933ee 100644
--- a/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py
+++ b/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py
@@ -456,6 +456,44 @@ def test_cugraph_loader_e2e_csc(framework: str):
 
 
 @pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available")
+@pytest.mark.parametrize("drop_last", [True, False])
+def test_drop_last(drop_last):
+    N = {"N": 10}
+    G = {
+        ("N", "e", "N"): torch.stack(
+            [torch.tensor([0, 1, 2, 3, 4]), torch.tensor([5, 6, 7, 8, 9])]
+        )
+    }
+    F = FeatureStore(backend="torch")
+    F.add_data(torch.arange(10), "N", "z")
+
+    store = CuGraphStore(F, G, N)
+    with tempfile.TemporaryDirectory() as dir:
+        loader = CuGraphNeighborLoader(
+            (store, store),
+            input_nodes=torch.tensor([0, 1, 2, 3, 4]),
+            num_neighbors=[1],
+            batch_size=2,
+            shuffle=False,
+            drop_last=drop_last,
+            batches_per_partition=1,
+            directory=dir,
+        )
+
+        t = torch.tensor([])
+        for batch in loader:
+            t = torch.concat([t, batch["N"].z])
+
+        t = t.tolist()
+
+        files = os.listdir(dir)
+        assert len(files) == 2 if drop_last else 3
+        assert "batch=0-0.parquet" in files
+        assert "batch=1-1.parquet" in files
+        if not drop_last:
+            assert "batch=2-2.parquet" in files
+
+
 @pytest.mark.parametrize("directory", ["local", "temp"])
 def test_load_directory(
     karate_gnn: Tuple[
diff --git a/python/cugraph/cugraph/structure/__init__.py b/python/cugraph/cugraph/structure/__init__.py
index d7e0ff62358..94f34fd23f3 100644
--- a/python/cugraph/cugraph/structure/__init__.py
+++ b/python/cugraph/cugraph/structure/__init__.py
@@ -25,6 +25,11 @@
 )
 from cugraph.structure.number_map import NumberMap
 from cugraph.structure.symmetrize import symmetrize, symmetrize_df, symmetrize_ddf
+from cugraph.structure.replicate_edgelist import (
+    replicate_edgelist,
+    replicate_cudf_dataframe,
+    replicate_cudf_series,
+)
 from cugraph.structure.convert_matrix import (
     from_edgelist,
     from_cudf_edgelist,
diff --git a/python/cugraph/cugraph/structure/replicate_edgelist.py b/python/cugraph/cugraph/structure/replicate_edgelist.py
new file mode 100644
index 00000000000..d413e50e485
--- /dev/null
+++ b/python/cugraph/cugraph/structure/replicate_edgelist.py
@@ -0,0 +1,351 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import dask_cudf
+import cudf
+from dask.distributed import wait, default_client
+import numpy as np
+from pylibcugraph import (
+    ResourceHandle,
+    replicate_edgelist as pylibcugraph_replicate_edgelist,
+)
+
+from cugraph.dask.common.part_utils import (
+    get_persisted_df_worker_map,
+    persist_dask_df_equal_parts_per_worker,
+)
+
+import dask
+import cupy as cp
+import cugraph.dask.comms.comms as Comms
+from typing import Union, Tuple
+
+
+# FIXME: Convert it to a general-purpose util function
+def _convert_to_cudf(cp_arrays: Tuple[cp.ndarray], col_names: list) -> cudf.DataFrame:
+    """
+    Creates a cudf Dataframe from cupy arrays
+    """
+    src, dst, wgt, edge_id, edge_type_id, _ = cp_arrays
+    gathered_edgelist_df = cudf.DataFrame()
+    gathered_edgelist_df[col_names[0]] = src
+    gathered_edgelist_df[col_names[1]] = dst
+    if wgt is not None:
+        gathered_edgelist_df[col_names[2]] = wgt
+    if edge_id is not None:
+        gathered_edgelist_df[col_names[3]] = edge_id
+    if edge_type_id is not None:
+        gathered_edgelist_df[col_names[4]] = edge_type_id
+
+    return gathered_edgelist_df
+
+
+def _call_plc_replicate_edgelist(
+    sID: bytes, edgelist_df: cudf.DataFrame, col_names: list
+) -> cudf.DataFrame:
+    edgelist_df = edgelist_df[0]
+    cp_arrays = pylibcugraph_replicate_edgelist(
+        resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
+        src_array=edgelist_df[col_names[0]],
+        dst_array=edgelist_df[col_names[1]],
+        weight_array=edgelist_df[col_names[2]] if len(col_names) > 2 else None,
+        edge_id_array=edgelist_df[col_names[3]] if len(col_names) > 3 else None,
+        edge_type_id_array=edgelist_df[col_names[4]] if len(col_names) > 4 else None,
+    )
+    return _convert_to_cudf(cp_arrays, col_names)
+
+
+def _call_plc_replicate_dataframe(sID: bytes, df: cudf.DataFrame) -> cudf.DataFrame:
+    df = df[0]
+    df_replicated = cudf.DataFrame()
+    for col_name in df.columns:
+        cp_array = pylibcugraph_replicate_edgelist(
+            resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
+            src_array=df[col_name]
+            if df[col_name].dtype in [np.int32, np.int64]
+            else None,
+            dst_array=None,
+            weight_array=df[col_name]
+            if df[col_name].dtype in [np.float32, np.float64]
+            else None,
+            edge_id_array=None,
+            edge_type_id_array=None,
+        )
+        src, _, wgt, _, _, _ = cp_array
+        if src is not None:
+            df_replicated[col_name] = src
+        elif wgt is not None:
+            df_replicated[col_name] = wgt
+
+    return df_replicated
+
+
+def _call_plc_replicate_series(sID: bytes, series: cudf.Series) -> cudf.Series:
+    series = series[0]
+    series_replicated = cudf.Series()
+    cp_array = pylibcugraph_replicate_edgelist(
+        resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
+        src_array=series if series.dtype in [np.int32, np.int64] else None,
+        dst_array=None,
+        weight_array=series if series.dtype in [np.float32, np.float64] else None,
+        edge_id_array=None,
+        edge_type_id_array=None,
+    )
+    src, _, wgt, _, _, _ = cp_array
+    if src is not None:
+        series_replicated = cudf.Series(src)
+    elif wgt is not None:
+        series_replicated = cudf.Series(wgt)
+
+    return series_replicated
+
+
+def _mg_call_plc_replicate(
+    client: dask.distributed.client.Client,
+    sID: bytes,
+    dask_object: dict,
+    input_type: str,
+    col_names: list,
+) -> Union[dask_cudf.DataFrame, dask_cudf.Series]:
+
+    if input_type == "dataframe":
+        result = [
+            client.submit(
+                _call_plc_replicate_dataframe,
+                sID,
+                edata,
+                workers=[w],
+                allow_other_workers=False,
+                pure=False,
+            )
+            for w, edata in dask_object.items()
+        ]
+    elif input_type == "dataframe":
+        result = [
+            client.submit(
+                _call_plc_replicate_series,
+                sID,
+                edata,
+                workers=[w],
+                allow_other_workers=False,
+                pure=False,
+            )
+            for w, edata in dask_object.items()
+        ]
+    elif input_type == "edgelist":
+        result = [
+            client.submit(
+                _call_plc_replicate_edgelist,
+                sID,
+                edata,
+                col_names,
+                workers=[w],
+                allow_other_workers=False,
+                pure=False,
+            )
+            for w, edata in dask_object.items()
+        ]
+
+    ddf = dask_cudf.from_delayed(result, verify_meta=False).persist()
+    wait(ddf)
+    wait([r.release() for r in result])
+    return ddf
+
+
+def replicate_edgelist(
+    edgelist_ddf: Union[dask_cudf.DataFrame, cudf.DataFrame] = None,
+    source="src",
+    destination="dst",
+    weight=None,
+    edge_id=None,
+    edge_type=None,
+) -> dask_cudf.DataFrame:
+    """
+    Replicate edges across all GPUs
+
+    Parameters
+    ----------
+
+    edgelist_ddf: cudf.DataFrame or dask_cudf.DataFrame
+        A DataFrame that contains edge information.
+
+    source : str or array-like
+            source column name or array of column names
+
+    destination : str or array-like
+        destination column name or array of column names
+
+    weight : str, optional (default=None)
+        Name of the weight column in the input dataframe.
+
+    edge_id : str, optional (default=None)
+        Name of the edge id column in the input dataframe.
+
+    edge_type : str, optional (default=None)
+        Name of the edge type column in the input dataframe.
+
+    Returns
+    -------
+    df : dask_cudf.DataFrame
+        A distributed dataframe where each partition contains the
+        combined edgelist from all GPUs. If a cudf.DataFrame was passed
+        as input, the edgelist will be replicated across all the other
+        GPUs in the cluster. If as dask_cudf.DataFrame was passed as input,
+        each partition will be filled with the edges of all partitions
+        in the dask_cudf.DataFrame.
+
+    """
+
+    _client = default_client()
+
+    if isinstance(edgelist_ddf, cudf.DataFrame):
+        edgelist_ddf = dask_cudf.from_cudf(
+            edgelist_ddf, npartitions=len(Comms.get_workers())
+        )
+    col_names = [source, destination]
+
+    if weight is not None:
+        col_names.append(weight)
+    if edge_id is not None:
+        col_names.append(edge_id)
+    if edge_type is not None:
+        col_names.append(edge_type)
+
+    if not (set(col_names).issubset(set(edgelist_ddf.columns))):
+        raise ValueError(
+            "Invalid column names were provided: valid columns names are "
+            f"{edgelist_ddf.columns}"
+        )
+
+    edgelist_ddf = persist_dask_df_equal_parts_per_worker(edgelist_ddf, _client)
+    edgelist_ddf = get_persisted_df_worker_map(edgelist_ddf, _client)
+
+    ddf = _mg_call_plc_replicate(
+        _client,
+        Comms.get_session_id(),
+        edgelist_ddf,
+        "edgelist",
+        col_names,
+    )
+
+    return ddf
+
+
+def replicate_cudf_dataframe(cudf_dataframe):
+    """
+    Replicate dataframe across all GPUs
+
+    Parameters
+    ----------
+
+    cudf_dataframe: cudf.DataFrame or dask_cudf.DataFrame
+
+    Returns
+    -------
+    df : dask_cudf.DataFrame
+        A distributed dataframe where each partition contains the
+        combined dataframe from all GPUs. If a cudf.DataFrame was passed
+        as input, the dataframe will be replicated across all the other
+        GPUs in the cluster. If as dask_cudf.DataFrame was passed as input,
+        each partition will be filled with the datafame of all partitions
+        in the dask_cudf.DataFrame.
+
+    """
+
+    supported_types = [np.int32, np.int64, np.float32, np.float64]
+    if not all(dtype in supported_types for dtype in cudf_dataframe.dtypes):
+        raise TypeError(
+            "The supported types are 'int32', 'int64', 'float32', 'float64'"
+        )
+
+    _client = default_client()
+
+    if not isinstance(cudf_dataframe, dask_cudf.DataFrame):
+        if isinstance(cudf_dataframe, cudf.DataFrame):
+            df = dask_cudf.from_cudf(
+                cudf_dataframe, npartitions=len(Comms.get_workers())
+            )
+        elif not isinstance(cudf_dataframe, dask_cudf.DataFrame):
+            raise TypeError(
+                "The variable 'cudf_dataframe' must be of type "
+                f"'cudf/dask_cudf.dataframe', got type {type(cudf_dataframe)}"
+            )
+    else:
+        df = cudf_dataframe
+
+    df = persist_dask_df_equal_parts_per_worker(df, _client)
+    df = get_persisted_df_worker_map(df, _client)
+
+    ddf = _mg_call_plc_replicate(
+        _client,
+        Comms.get_session_id(),
+        df,
+        "dataframe",
+    )
+
+    return ddf
+
+
+def replicate_cudf_series(cudf_series):
+    """
+    Replicate series across all GPUs
+
+    Parameters
+    ----------
+
+    cudf_series: cudf.Series or dask_cudf.Series
+
+    Returns
+    -------
+    series : dask_cudf.Series
+        A distributed series where each partition contains the
+        combined series from all GPUs. If a cudf.Series was passed
+        as input, the Series will be replicated across all the other
+        GPUs in the cluster. If as dask_cudf.Series was passed as input,
+        each partition will be filled with the series of all partitions
+        in the dask_cudf.Series.
+
+    """
+
+    supported_types = [np.int32, np.int64, np.float32, np.float64]
+    if cudf_series.dtype not in supported_types:
+        raise TypeError(
+            "The supported types are 'int32', 'int64', 'float32', 'float64'"
+        )
+
+    _client = default_client()
+
+    if not isinstance(cudf_series, dask_cudf.Series):
+        if isinstance(cudf_series, cudf.Series):
+            series = dask_cudf.from_cudf(
+                cudf_series, npartitions=len(Comms.get_workers())
+            )
+        elif not isinstance(cudf_series, dask_cudf.Series):
+            raise TypeError(
+                "The variable 'cudf_series' must be of type "
+                f"'cudf/dask_cudf.series', got type {type(cudf_series)}"
+            )
+    else:
+        series = cudf_series
+
+    series = persist_dask_df_equal_parts_per_worker(series, _client)
+    series = get_persisted_df_worker_map(series, _client)
+
+    series = _mg_call_plc_replicate(
+        _client,
+        Comms.get_session_id(),
+        series,
+        "series",
+    )
+
+    return series
diff --git a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py
index d93fa3b547d..8d80611a54c 100644
--- a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py
+++ b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py
@@ -93,7 +93,7 @@ def input_expected_output(input_combo):
     srcs = G.view_edge_list()["0"]
     dsts = G.view_edge_list()["1"]
     vertices = cudf.concat([srcs, dsts]).drop_duplicates()
-    vertices = vertices.sample(num_seeds).astype("int32")
+    vertices = vertices.sample(num_seeds, replace=True).astype("int32")
 
     # print randomly sample n seeds from the graph
     print("\nvertices: \n", vertices)
diff --git a/python/cugraph/cugraph/tests/internals/test_replicate_edgelist_mg.py b/python/cugraph/cugraph/tests/internals/test_replicate_edgelist_mg.py
new file mode 100644
index 00000000000..3bdb5c079ef
--- /dev/null
+++ b/python/cugraph/cugraph/tests/internals/test_replicate_edgelist_mg.py
@@ -0,0 +1,128 @@
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gc
+
+import pytest
+
+import dask_cudf
+import numpy as np
+from cugraph.testing import UNDIRECTED_DATASETS, karate_disjoint
+
+from cugraph.structure.replicate_edgelist import replicate_edgelist
+from cudf.testing.testing import assert_frame_equal
+from pylibcugraph.testing.utils import gen_fixture_params_product
+
+
+# =============================================================================
+# Pytest Setup / Teardown - called for each test function
+# =============================================================================
+def setup_function():
+    gc.collect()
+
+
+edgeWeightCol = "weights"
+edgeIdCol = "edge_id"
+edgeTypeCol = "edge_type"
+srcCol = "src"
+dstCol = "dst"
+
+
+input_data = UNDIRECTED_DATASETS + [karate_disjoint]
+datasets = [pytest.param(d) for d in input_data]
+
+fixture_params = gen_fixture_params_product(
+    (datasets, "graph_file"),
+    ([True, False], "distributed"),
+    ([True, False], "use_weights"),
+    ([True, False], "use_edge_ids"),
+    ([True, False], "use_edge_type_ids"),
+)
+
+
+@pytest.fixture(scope="module", params=fixture_params)
+def input_combo(request):
+    """
+    Simply return the current combination of params as a dictionary for use in
+    tests or other parameterized fixtures.
+    """
+    return dict(
+        zip(
+            (
+                "graph_file",
+                "use_weights",
+                "use_edge_ids",
+                "use_edge_type_ids",
+                "distributed",
+            ),
+            request.param,
+        )
+    )
+
+
+# =============================================================================
+# Tests
+# =============================================================================
+# @pytest.mark.skipif(
+#    is_single_gpu(), reason="skipping MG testing on Single GPU system"
+# )
+@pytest.mark.mg
+def test_mg_replicate_edgelist(dask_client, input_combo):
+    df = input_combo["graph_file"].get_edgelist()
+    distributed = input_combo["distributed"]
+
+    use_weights = input_combo["use_weights"]
+    use_edge_ids = input_combo["use_edge_ids"]
+    use_edge_type_ids = input_combo["use_edge_type_ids"]
+
+    columns = [srcCol, dstCol]
+    weight = None
+    edge_id = None
+    edge_type = None
+
+    if use_weights:
+        df = df.rename(columns={"wgt": edgeWeightCol})
+        columns.append(edgeWeightCol)
+        weight = edgeWeightCol
+    if use_edge_ids:
+        df = df.reset_index().rename(columns={"index": edgeIdCol})
+        df[edgeIdCol] = df[edgeIdCol].astype(df[srcCol].dtype)
+        columns.append(edgeIdCol)
+        edge_id = edgeIdCol
+    if use_edge_type_ids:
+        df[edgeTypeCol] = np.random.randint(0, 10, size=len(df))
+        df[edgeTypeCol] = df[edgeTypeCol].astype(df[srcCol].dtype)
+        columns.append(edgeTypeCol)
+        edge_type = edgeTypeCol
+
+    if distributed:
+        # Distribute the edges across all ranks
+        num_workers = len(dask_client.scheduler_info()["workers"])
+        df = dask_cudf.from_cudf(df, npartitions=num_workers)
+    ddf = replicate_edgelist(
+        df[columns], weight=weight, edge_id=edge_id, edge_type=edge_type
+    )
+
+    if distributed:
+        df = df.compute()
+
+    for i in range(ddf.npartitions):
+        result_df = (
+            ddf.get_partition(i)
+            .compute()
+            .sort_values([srcCol, dstCol])
+            .reset_index(drop=True)
+        )
+        expected_df = df[columns].sort_values([srcCol, dstCol]).reset_index(drop=True)
+
+        assert_frame_equal(expected_df, result_df, check_dtype=False, check_like=True)
diff --git a/python/cugraph/pyproject.toml b/python/cugraph/pyproject.toml
index 319900b3de3..bd426291c8d 100644
--- a/python/cugraph/pyproject.toml
+++ b/python/cugraph/pyproject.toml
@@ -35,6 +35,7 @@ dependencies = [
     "dask-cudf==23.12.*",
     "fsspec[http]>=0.6.0",
     "numba>=0.57",
+    "numpy>=1.21",
     "pylibcugraph==23.12.*",
     "raft-dask==23.12.*",
     "rapids-dask-dependency==23.12.*",
diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py
index 26638d1e735..910db1bc379 100644
--- a/python/nx-cugraph/_nx_cugraph/__init__.py
+++ b/python/nx-cugraph/_nx_cugraph/__init__.py
@@ -79,6 +79,8 @@
         "path_graph",
         "petersen_graph",
         "sedgewick_maze_graph",
+        "single_source_shortest_path_length",
+        "single_target_shortest_path_length",
         "star_graph",
         "tadpole_graph",
         "tetrahedral_graph",
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py
index 87b1967fa93..32cd6f31a47 100644
--- a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py
+++ b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py
@@ -10,9 +10,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from . import bipartite, centrality, community, components
+from . import bipartite, centrality, community, components, shortest_paths
 from .bipartite import complete_bipartite_graph
 from .centrality import *
 from .components import *
 from .core import *
 from .isolate import *
+from .shortest_paths import *
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/__init__.py
new file mode 100644
index 00000000000..b7d6b742176
--- /dev/null
+++ b/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .unweighted import *
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/unweighted.py b/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/unweighted.py
new file mode 100644
index 00000000000..3413a637b32
--- /dev/null
+++ b/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/unweighted.py
@@ -0,0 +1,53 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import cupy as cp
+import networkx as nx
+import numpy as np
+import pylibcugraph as plc
+
+from nx_cugraph.convert import _to_graph
+from nx_cugraph.utils import index_dtype, networkx_algorithm
+
+__all__ = ["single_source_shortest_path_length", "single_target_shortest_path_length"]
+
+
+@networkx_algorithm
+def single_source_shortest_path_length(G, source, cutoff=None):
+    return _single_shortest_path_length(G, source, cutoff, "Source")
+
+
+@networkx_algorithm
+def single_target_shortest_path_length(G, target, cutoff=None):
+    return _single_shortest_path_length(G, target, cutoff, "Target")
+
+
+def _single_shortest_path_length(G, source, cutoff, kind):
+    G = _to_graph(G)
+    if source not in G:
+        raise nx.NodeNotFound(f"{kind} {source} is not in G")
+    if G.src_indices.size == 0:
+        return {source: 0}
+    if cutoff is None:
+        cutoff = -1
+    src_index = source if G.key_to_id is None else G.key_to_id[source]
+    distances, predecessors, node_ids = plc.bfs(
+        handle=plc.ResourceHandle(),
+        graph=G._get_plc_graph(switch_indices=kind == "Target"),
+        sources=cp.array([src_index], index_dtype),
+        direction_optimizing=False,  # True for undirected only; what's recommended?
+        depth_limit=cutoff,
+        compute_predecessors=False,
+        do_expensive_check=False,
+    )
+    mask = distances != np.iinfo(distances.dtype).max
+    return G._nodearrays_to_dict(node_ids[mask], distances[mask])
diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py
index 23004651fc5..fea318e036e 100644
--- a/python/nx-cugraph/nx_cugraph/classes/graph.py
+++ b/python/nx-cugraph/nx_cugraph/classes/graph.py
@@ -559,6 +559,7 @@ def _get_plc_graph(
         edge_dtype: Dtype | None = None,
         *,
         store_transposed: bool = False,
+        switch_indices: bool = False,
         edge_array: cp.ndarray[EdgeValue] | None = None,
     ):
         if edge_array is not None:
@@ -613,14 +614,18 @@ def _get_plc_graph(
             elif edge_array.dtype not in self._plc_allowed_edge_types:
                 raise TypeError(edge_array.dtype)
         # Should we cache PLC graph?
+        src_indices = self.src_indices
+        dst_indices = self.dst_indices
+        if switch_indices:
+            src_indices, dst_indices = dst_indices, src_indices
         return plc.SGGraph(
             resource_handle=plc.ResourceHandle(),
             graph_properties=plc.GraphProperties(
                 is_multigraph=self.is_multigraph(),
                 is_symmetric=not self.is_directed(),
             ),
-            src_or_offset_array=self.src_indices,
-            dst_or_index_array=self.dst_indices,
+            src_or_offset_array=src_indices,
+            dst_or_index_array=dst_indices,
             weight_array=edge_array,
             store_transposed=store_transposed,
             renumber=False,
diff --git a/python/nx-cugraph/nx_cugraph/interface.py b/python/nx-cugraph/nx_cugraph/interface.py
index 875f8621021..8903fdc541e 100644
--- a/python/nx-cugraph/nx_cugraph/interface.py
+++ b/python/nx-cugraph/nx_cugraph/interface.py
@@ -224,9 +224,11 @@ def key(testpath):
                     )
 
         too_slow = "Too slow to run"
+        maybe_oom = "out of memory in CI"
         skip = {
             key("test_tree_isomorphism.py:test_positive"): too_slow,
             key("test_tree_isomorphism.py:test_negative"): too_slow,
+            key("test_efficiency.py:TestEfficiency.test_using_ego_graph"): maybe_oom,
         }
 
         for item in items:
diff --git a/python/pylibcugraph/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/pylibcugraph/CMakeLists.txt
index 6618c50122c..c2e22fc1ff7 100644
--- a/python/pylibcugraph/pylibcugraph/CMakeLists.txt
+++ b/python/pylibcugraph/pylibcugraph/CMakeLists.txt
@@ -56,6 +56,7 @@ set(cython_sources
     uniform_random_walks.pyx
     utils.pyx
     weakly_connected_components.pyx
+    replicate_edgelist.pyx
 )
 set(linked_libraries cugraph::cugraph;cugraph::cugraph_c)
 
diff --git a/python/pylibcugraph/pylibcugraph/__init__.py b/python/pylibcugraph/pylibcugraph/__init__.py
index 30f1c2d0fb1..1d02498ea30 100644
--- a/python/pylibcugraph/pylibcugraph/__init__.py
+++ b/python/pylibcugraph/pylibcugraph/__init__.py
@@ -87,6 +87,8 @@
 
 from pylibcugraph.generate_rmat_edgelists import generate_rmat_edgelists
 
+from pylibcugraph.replicate_edgelist import replicate_edgelist
+
 from pylibcugraph.k_truss_subgraph import k_truss_subgraph
 
 from pylibcugraph.jaccard_coefficients import jaccard_coefficients
diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd
index f18e9848182..8b3a629956c 100644
--- a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd
+++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -138,6 +138,16 @@ cdef extern from "cugraph_c/graph_functions.h":
             cugraph_induced_subgraph_result_t* induced_subgraph
         )
     
+    cdef cugraph_type_erased_device_array_view_t* \
+        cugraph_induced_subgraph_get_edge_ids(
+            cugraph_induced_subgraph_result_t* induced_subgraph
+        )
+    
+    cdef cugraph_type_erased_device_array_view_t* \
+        cugraph_induced_subgraph_get_edge_type_ids(
+            cugraph_induced_subgraph_result_t* induced_subgraph
+        )
+    
     cdef cugraph_type_erased_device_array_view_t* \
         cugraph_induced_subgraph_get_subgraph_offsets(
             cugraph_induced_subgraph_result_t* induced_subgraph
@@ -158,3 +168,17 @@ cdef extern from "cugraph_c/graph_functions.h":
             cugraph_induced_subgraph_result_t** result,
             cugraph_error_t** error
         )
+
+    ###########################################################################
+    # allgather
+    cdef cugraph_error_code_t \
+        cugraph_allgather(
+            const cugraph_resource_handle_t* handle,
+            const cugraph_type_erased_device_array_view_t* src,
+            const cugraph_type_erased_device_array_view_t* dst,
+            const cugraph_type_erased_device_array_view_t* weights,
+            const cugraph_type_erased_device_array_view_t* edge_ids,
+            const cugraph_type_erased_device_array_view_t* edge_type_ids,
+            cugraph_induced_subgraph_result_t** result,
+            cugraph_error_t** error
+        )
diff --git a/python/pylibcugraph/pylibcugraph/replicate_edgelist.pyx b/python/pylibcugraph/pylibcugraph/replicate_edgelist.pyx
new file mode 100644
index 00000000000..3763d4bc69d
--- /dev/null
+++ b/python/pylibcugraph/pylibcugraph/replicate_edgelist.pyx
@@ -0,0 +1,202 @@
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Have cython use python 3 syntax
+# cython: language_level = 3
+
+
+from pylibcugraph._cugraph_c.resource_handle cimport (
+    cugraph_resource_handle_t,
+)
+from pylibcugraph._cugraph_c.error cimport (
+    cugraph_error_code_t,
+    cugraph_error_t,
+)
+from pylibcugraph._cugraph_c.array cimport (
+    cugraph_type_erased_device_array_view_t,
+    cugraph_type_erased_device_array_view_free,
+)
+from pylibcugraph._cugraph_c.graph_functions cimport (
+    cugraph_allgather,
+    cugraph_induced_subgraph_result_t,
+    cugraph_induced_subgraph_get_sources,
+    cugraph_induced_subgraph_get_destinations,
+    cugraph_induced_subgraph_get_edge_weights,
+    cugraph_induced_subgraph_get_edge_ids,
+    cugraph_induced_subgraph_get_edge_type_ids,
+    cugraph_induced_subgraph_get_subgraph_offsets,
+    cugraph_induced_subgraph_result_free,
+)
+from pylibcugraph.resource_handle cimport (
+    ResourceHandle,
+)
+from pylibcugraph.utils cimport (
+    assert_success,
+    assert_CAI_type,
+    copy_to_cupy_array,
+    create_cugraph_type_erased_device_array_view_from_py_obj
+)
+
+
+def replicate_edgelist(ResourceHandle resource_handle,
+                       src_array,
+                       dst_array,
+                       weight_array,
+                       edge_id_array,
+                       edge_type_id_array):
+    """
+        Replicate edges across all GPUs
+
+        Parameters
+        ----------
+        resource_handle : ResourceHandle
+            Handle to the underlying device resources needed for referencing data
+            and running algorithms.
+        
+        src_array : device array type, optional
+            Device array containing the vertex identifiers of the source of each
+            directed edge. The order of the array corresponds to the ordering of the
+            dst_array, where the ith item in src_array and the ith item in dst_array
+            define the ith edge of the graph.
+        
+        dst_array : device array type, optional
+            Device array containing the vertex identifiers of the destination of
+            each directed edge. The order of the array corresponds to the ordering
+            of the src_array, where the ith item in src_array and the ith item in
+            dst_array define the ith edge of the graph.
+
+        weight_array : device array type, optional
+            Device array containing the weight values of each directed edge. The
+            order of the array corresponds to the ordering of the src_array and
+            dst_array arrays, where the ith item in weight_array is the weight value
+            of the ith edge of the graph.
+        
+        edge_id_array : device array type, optional
+            Device array containing the edge id values of each directed edge. The
+            order of the array corresponds to the ordering of the src_array and
+            dst_array arrays, where the ith item in edge_id_array is the id value
+            of the ith edge of the graph.
+        
+        edge_type_id_array : device array type, optional
+            Device array containing the edge type id values of each directed edge. The
+            order of the array corresponds to the ordering of the src_array and
+            dst_array arrays, where the ith item in edge_type_id_array is the type id
+            value of the ith edge of the graph.
+
+        Returns
+        -------
+        return cupy arrays of 'src' and/or 'dst' and/or 'weight'and/or 'edge_id'
+        and/or 'edge_type_id'.
+    """
+    assert_CAI_type(src_array, "src_array", True)
+    assert_CAI_type(dst_array, "dst_array", True)
+    assert_CAI_type(weight_array, "weight_array", True)
+    assert_CAI_type(edge_id_array, "edge_id_array", True)
+    assert_CAI_type(edge_type_id_array, "edge_type_id_array", True)
+    cdef cugraph_resource_handle_t* c_resource_handle_ptr = \
+        resource_handle.c_resource_handle_ptr
+
+    cdef cugraph_induced_subgraph_result_t* result_ptr
+    cdef cugraph_error_code_t error_code
+    cdef cugraph_error_t* error_ptr
+
+    cdef cugraph_type_erased_device_array_view_t* srcs_view_ptr = \
+        create_cugraph_type_erased_device_array_view_from_py_obj(src_array)
+        
+    cdef cugraph_type_erased_device_array_view_t* dsts_view_ptr = \
+        create_cugraph_type_erased_device_array_view_from_py_obj(dst_array)
+
+    
+    cdef cugraph_type_erased_device_array_view_t* weights_view_ptr = \
+        create_cugraph_type_erased_device_array_view_from_py_obj(weight_array)
+
+    cdef cugraph_type_erased_device_array_view_t* edge_ids_view_ptr = \
+        create_cugraph_type_erased_device_array_view_from_py_obj(edge_id_array)
+    
+    cdef cugraph_type_erased_device_array_view_t* edge_type_ids_view_ptr = \
+        create_cugraph_type_erased_device_array_view_from_py_obj(edge_type_id_array)
+
+    error_code = cugraph_allgather(c_resource_handle_ptr,
+                                   srcs_view_ptr,
+                                   dsts_view_ptr,
+                                   weights_view_ptr,
+                                   edge_ids_view_ptr,
+                                   edge_type_ids_view_ptr,
+                                   &result_ptr,
+                                   &error_ptr)
+    assert_success(error_code, error_ptr, "replicate_edgelist")
+    # Extract individual device array pointers from result and copy to cupy
+    # arrays for returning.
+    cdef cugraph_type_erased_device_array_view_t* sources_ptr
+    if src_array is not None:
+        sources_ptr = cugraph_induced_subgraph_get_sources(result_ptr)
+    cdef cugraph_type_erased_device_array_view_t* destinations_ptr
+    if dst_array is not None:
+        destinations_ptr = cugraph_induced_subgraph_get_destinations(result_ptr)
+    cdef cugraph_type_erased_device_array_view_t* edge_weights_ptr = \
+        cugraph_induced_subgraph_get_edge_weights(result_ptr)
+    cdef cugraph_type_erased_device_array_view_t* edge_ids_ptr = \
+        cugraph_induced_subgraph_get_edge_ids(result_ptr)
+    cdef cugraph_type_erased_device_array_view_t* edge_type_ids_ptr = \
+        cugraph_induced_subgraph_get_edge_type_ids(result_ptr)
+    cdef cugraph_type_erased_device_array_view_t* subgraph_offsets_ptr = \
+        cugraph_induced_subgraph_get_subgraph_offsets(result_ptr)
+
+    # FIXME: Get ownership of the result data instead of performing a copy
+    # for perfomance improvement
+
+    cupy_sources = None
+    cupy_destinations = None
+    cupy_edge_weights = None
+    cupy_edge_ids = None
+    cupy_edge_type_ids = None
+
+    if src_array is not None:
+        cupy_sources = copy_to_cupy_array(
+            c_resource_handle_ptr, sources_ptr)
+
+    if dst_array is not None:
+        cupy_destinations = copy_to_cupy_array(
+            c_resource_handle_ptr, destinations_ptr)
+
+    if weight_array is not None:
+        cupy_edge_weights = copy_to_cupy_array(
+            c_resource_handle_ptr, edge_weights_ptr)
+    
+    if edge_id_array is not None:
+        cupy_edge_ids = copy_to_cupy_array(
+            c_resource_handle_ptr, edge_ids_ptr)
+    
+    if edge_type_id_array is not None:
+        cupy_edge_type_ids = copy_to_cupy_array(
+            c_resource_handle_ptr, edge_type_ids_ptr)
+
+    cupy_subgraph_offsets = copy_to_cupy_array(
+        c_resource_handle_ptr, subgraph_offsets_ptr)
+
+    # Free pointer
+    cugraph_induced_subgraph_result_free(result_ptr)
+    if src_array is not None:
+        cugraph_type_erased_device_array_view_free(srcs_view_ptr)
+    if dst_array is not None:
+        cugraph_type_erased_device_array_view_free(dsts_view_ptr)
+    if weight_array is not None:
+        cugraph_type_erased_device_array_view_free(weights_view_ptr)
+    if edge_id_array is not None:
+        cugraph_type_erased_device_array_view_free(edge_ids_view_ptr)
+    if edge_type_id_array is not None:
+        cugraph_type_erased_device_array_view_free(edge_type_ids_view_ptr)
+
+    return (cupy_sources, cupy_destinations,
+            cupy_edge_weights, cupy_edge_ids,
+            cupy_edge_type_ids, cupy_subgraph_offsets)