Skip to content
This repository has been archived by the owner on Nov 25, 2024. It is now read-only.

[RELEASE] wholegraph v24.02 #125

Merged
merged 24 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
d589258
v24.02 Updates [skip ci]
raydouglass Nov 9, 2023
6566b63
Merge pull request #93 from rapidsai/branch-23.12
GPUtester Nov 17, 2023
bf6a742
Merge pull request #94 from rapidsai/branch-23.12
GPUtester Nov 17, 2023
3897214
Merge pull request #95 from rapidsai/branch-23.12
GPUtester Nov 17, 2023
b1b266d
Merge pull request #98 from rapidsai/branch-23.12
GPUtester Nov 21, 2023
77676ea
Merge branch-23.12 into branch-24.02
bdice Nov 27, 2023
9853c62
Fix whitespace.
bdice Nov 27, 2023
0586438
Merge pull request #102 from bdice/branch-24.02-merge-23.12
raydouglass Nov 27, 2023
fbc7951
Merge pull request #104 from rapidsai/branch-23.12
GPUtester Nov 28, 2023
0c61783
Merge pull request #109 from rapidsai/branch-23.12
GPUtester Dec 6, 2023
c43f6d1
Align versions for cudnn, clang-tools, cython, and doxygen with the r…
bdice Dec 18, 2023
ef5b3ee
Don't overwrite wholegraph_ROOT if provided (#114)
vyasr Dec 19, 2023
aaceac5
added Direct IO support for WholeMemory loading (#113)
dongxuy04 Jan 9, 2024
7025eaf
refactor CUDA versions in dependencies.yaml (#115)
jameslamb Jan 11, 2024
0ddab62
Remove usages of rapids-env-update (#117)
KyleFromNVIDIA Jan 12, 2024
4a92d47
fix inferencesample option (#107)
chuangz0 Jan 19, 2024
ec609ab
fix a bug for embedding optimizer, which leads to undefined behavior …
linhu-nv Jan 19, 2024
c592185
Reset WholeGraph communicators during the finalize call (#111)
chang-l Jan 22, 2024
8cb7c5c
Fix pip dependencies (#118)
trxcllnt Jan 22, 2024
503cdcd
Exclude tests from builds (#127)
vyasr Feb 7, 2024
58602ed
Revert "Exclude tests from builds (#127)" (#130)
raydouglass Feb 8, 2024
40cc75d
Logging level (#123)
linhu-nv Feb 12, 2024
b49be9d
allow users to control gather/scatter sms (#124)
linhu-nv Feb 12, 2024
cecd3ff
Update Changelog [skip ci]
raydouglass Feb 12, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ concurrency:
jobs:
cpp-build:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.12
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.02
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
Expand All @@ -38,7 +38,7 @@ jobs:
python-build:
needs: [cpp-build]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-23.12
uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
Expand All @@ -49,7 +49,7 @@ jobs:
if: github.ref_type == 'branch'
needs: [python-build]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.12
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.02
with:
arch: "amd64"
branch: ${{ inputs.branch }}
Expand All @@ -62,15 +62,15 @@ jobs:
upload-conda:
needs: [cpp-build, python-build]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-23.12
uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.02
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
wheel-build-pylibwholegraph:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-23.12
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
Expand All @@ -80,7 +80,7 @@ jobs:
wheel-publish-pylibwholegraph:
needs: wheel-build-pylibwholegraph
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-23.12
uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
Expand Down
18 changes: 9 additions & 9 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,41 +21,41 @@ jobs:
- wheel-build-pylibwholegraph
- wheel-test-pylibwholegraph
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-23.12
uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.02
checks:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-23.12
uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.02
with:
enable_check_generated_files: false
conda-cpp-build:
needs: checks
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-23.12
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.02
with:
build_type: pull-request
node_type: cpu16
conda-cpp-tests:
needs: conda-cpp-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.12
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.02
with:
build_type: pull-request
conda-python-build:
needs: conda-cpp-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-23.12
uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02
with:
build_type: pull-request
conda-python-tests:
needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.02
with:
build_type: pull-request
docs-build:
needs: conda-python-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-23.12
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.02
with:
build_type: pull-request
arch: "amd64"
Expand All @@ -64,14 +64,14 @@ jobs:
wheel-build-pylibwholegraph:
needs: checks
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-23.12
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02
with:
build_type: pull-request
script: ci/build_wheel.sh
wheel-test-pylibwholegraph:
needs: wheel-build-pylibwholegraph
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.12
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02
with:
build_type: pull-request
script: ci/test_wheel.sh
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,23 @@ on:
jobs:
conda-cpp-tests:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-23.12
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.02
with:
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
conda-pytorch-tests:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-23.12
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.02
with:
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
wheel-tests-pylibwholegraph:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-23.12
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02
with:
build_type: nightly
branch: ${{ inputs.branch }}
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,4 @@ cpp/.idea/
cpp/cmake-build-debug/
pylibwholegraph/.idea/
pylibwholegraph/cmake-build-debug/
compile_commands.json
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ repos:
pass_filenames: false
additional_dependencies: [gitpython]
- repo: https://github.com/rapidsai/dependency-file-generator
rev: v1.5.1
rev: v1.8.0
hooks:
- id: rapids-dependency-file-generator
args: ["--clean"]
25 changes: 25 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,28 @@
# wholegraph 24.02.00 (12 Feb 2024)

## 🐛 Bug Fixes

- Revert "Exclude tests from builds ([#127)" (#130](https://github.com/rapidsai/wholegraph/pull/127)" (#130)) [@raydouglass](https://github.com/raydouglass)
- Exclude tests from builds ([#127](https://github.com/rapidsai/wholegraph/pull/127)) [@vyasr](https://github.com/vyasr)
- fix a bug for embedding optimizer, which leads to undefined behavior ([#108](https://github.com/rapidsai/wholegraph/pull/108)) [@linhu-nv](https://github.com/linhu-nv)
- fix inferencesample option ([#107](https://github.com/rapidsai/wholegraph/pull/107)) [@chuangz0](https://github.com/chuangz0)

## 🚀 New Features

- allow users to control gather/scatter sms ([#124](https://github.com/rapidsai/wholegraph/pull/124)) [@linhu-nv](https://github.com/linhu-nv)

## 🛠️ Improvements

- Logging level ([#123](https://github.com/rapidsai/wholegraph/pull/123)) [@linhu-nv](https://github.com/linhu-nv)
- Fix pip dependencies ([#118](https://github.com/rapidsai/wholegraph/pull/118)) [@trxcllnt](https://github.com/trxcllnt)
- Remove usages of rapids-env-update ([#117](https://github.com/rapidsai/wholegraph/pull/117)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
- refactor CUDA versions in dependencies.yaml ([#115](https://github.com/rapidsai/wholegraph/pull/115)) [@jameslamb](https://github.com/jameslamb)
- Don't overwrite wholegraph_ROOT if provided ([#114](https://github.com/rapidsai/wholegraph/pull/114)) [@vyasr](https://github.com/vyasr)
- added Direct IO support for WholeMemory loading ([#113](https://github.com/rapidsai/wholegraph/pull/113)) [@dongxuy04](https://github.com/dongxuy04)
- Align versions for cudnn, clang-tools, cython, and doxygen with the rest of RAPIDS. ([#112](https://github.com/rapidsai/wholegraph/pull/112)) [@bdice](https://github.com/bdice)
- Reset WholeGraph communicators during the finalize call ([#111](https://github.com/rapidsai/wholegraph/pull/111)) [@chang-l](https://github.com/chang-l)
- Forward-merge branch-23.12 to branch-24.02 ([#102](https://github.com/rapidsai/wholegraph/pull/102)) [@bdice](https://github.com/bdice)

# wholegraph 23.12.00 (6 Dec 2023)

## 🐛 Bug Fixes
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
23.12.00
24.02.00
6 changes: 5 additions & 1 deletion ci/build_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@

set -euo pipefail

source rapids-env-update
rapids-configure-conda-channels

source rapids-configure-sccache

source rapids-date-string

export CMAKE_GENERATOR=Ninja

Expand Down
2 changes: 1 addition & 1 deletion ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ rapids-print-env
rapids-logger "Downloading artifacts from previous jobs"

CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
export RAPIDS_VERSION_NUMBER="23.12"
export RAPIDS_VERSION_NUMBER="24.02"
export RAPIDS_DOCS_DIR="$(mktemp -d)"

rapids-mamba-retry install \
Expand Down
6 changes: 5 additions & 1 deletion ci/build_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@

set -euo pipefail

source rapids-env-update
rapids-configure-conda-channels

source rapids-configure-sccache

source rapids-date-string

export CMAKE_GENERATOR=Ninja

Expand Down
17 changes: 9 additions & 8 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,23 @@ channels:
dependencies:
- breathe
- c-compiler
- clang-tools=16.0.0
- clangxx=16.0.0
- clang-tools==16.0.6
- clangxx==16.0.6
- cmake>=3.26.4
- cuda-nvtx=11.8
- cudatoolkit=11.8
- cudnn=8.4
- cuda-version=11.8
- cudatoolkit
- cudnn=8.8
- cxx-compiler
- cython
- doxygen=1.8.20
- cython>=3.0.0
- doxygen==1.9.1
- gcc_linux-64=11.*
- gitpython
- graphviz
- ipykernel
- ipython
- libraft-headers==23.12.*
- librmm==23.12.*
- libraft-headers==24.2.*
- librmm==24.2.*
- nanobind>=0.2.0
- nbsphinx
- nccl
Expand Down
14 changes: 7 additions & 7 deletions conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,24 @@ channels:
dependencies:
- breathe
- c-compiler
- clang-tools=16.0.0
- clangxx=16.0.0
- clang-tools==16.0.6
- clangxx==16.0.6
- cmake>=3.26.4
- cuda-cudart-dev
- cuda-nvcc
- cuda-nvtx
- cuda-version=12.0
- cudnn=8.4
- cudnn=8.8
- cxx-compiler
- cython
- doxygen=1.8.20
- cython>=3.0.0
- doxygen==1.9.1
- gcc_linux-64=11.*
- gitpython
- graphviz
- ipykernel
- ipython
- libraft-headers==23.12.*
- librmm==23.12.*
- libraft-headers==24.2.*
- librmm==24.2.*
- nanobind>=0.2.0
- nbsphinx
- nccl
Expand Down
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# limitations under the License.
#=============================================================================

set(RAPIDS_VERSION "23.12")
set(RAPIDS_VERSION "24.02")
set(WHOLEGRAPH_VERSION "${RAPIDS_VERSION}.00")

cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
Expand Down
2 changes: 1 addition & 1 deletion cpp/Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ PROJECT_NAME = "WholeGraph C API"
# could be handy for archiving the generated documentation or if some version
# control system is used.

PROJECT_NUMBER = 23.12
PROJECT_NUMBER = 24.02

# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a
Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake/thirdparty/get_raft.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ endfunction()
# CPM_raft_SOURCE=/path/to/local/raft
find_and_configure_raft(VERSION ${WHOLEGRAPH_MIN_VERSION_raft}
FORK rapidsai
PINNED_TAG branch-${WHOLEGRAPH_BRANCH_VERSION_raft}
PINNED_TAG branch-${WHOLEGRAPH_BRANCH_VERSION_raft}

# When PINNED_TAG above doesn't match wholegraph,
# force local raft clone in build directory
Expand Down
4 changes: 3 additions & 1 deletion cpp/include/wholememory/embedding.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ wholememory_error_code_t wholememory_destroy_embedding_cache_policy(
* @param memory_location : Memory Location of the underlying WholeMemory
* @param optimizer : Optimizer to use for training, if don't train embedding, use nullptr
* @param cache_policy : Cache policy for this embedding, if don't use cache, use nullptr
* @param user_defined_sms : User-defined sms number for raw embedding gather/scatter
* @return : wholememory_error_code_t
*/
wholememory_error_code_t wholememory_create_embedding(
Expand All @@ -139,7 +140,8 @@ wholememory_error_code_t wholememory_create_embedding(
wholememory_memory_type_t memory_type,
wholememory_memory_location_t memory_location,
wholememory_embedding_optimizer_t optimizer,
wholememory_embedding_cache_policy_t cache_policy);
wholememory_embedding_cache_policy_t cache_policy,
int user_defined_sms = -1);

/**
* Destroy WholeMemory Embedding
Expand Down
3 changes: 2 additions & 1 deletion cpp/include/wholememory/wholememory.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,10 @@ enum wholememory_distributed_backend_t {
/**
* Initialize WholeMemory library
* @param flags : reserved should be 0
* @param wm_log_level : wholememory log level, the default level is "info"
* @return : wholememory_error_code_t
*/
wholememory_error_code_t wholememory_init(unsigned int flags);
wholememory_error_code_t wholememory_init(unsigned int flags, unsigned int wm_log_level = 3);

/**
* Finalize WholeMemory library
Expand Down
8 changes: 6 additions & 2 deletions cpp/include/wholememory/wholememory_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,15 @@ extern "C" {
* @param output_tensor : output tensor to gather to, should NOT be WholeMemoryTensor
* @param p_env_fns : pointers to environment functions.
* @param stream : cudaStream_t to use.
* @param gather_sms : the number of stream multiprocessor used in gather kernel
* @return : wholememory_error_code_t
*/
wholememory_error_code_t wholememory_gather(wholememory_tensor_t wholememory_tensor,
wholememory_tensor_t indices_tensor,
wholememory_tensor_t output_tensor,
wholememory_env_func_t* p_env_fns,
void* stream);
void* stream,
int gather_sms = -1);

/**
* Scatter Op
Expand All @@ -45,13 +47,15 @@ wholememory_error_code_t wholememory_gather(wholememory_tensor_t wholememory_ten
* @param wholememory_tensor : WholeMemory Tensor of embedding table.
* @param p_env_fns : pointers to environment functions.
* @param stream : cudaStream_t to use.
* @param scatter_sms : the number of stream multiprocessor used in scatter kernel
* @return : wholememory_error_code_t
*/
wholememory_error_code_t wholememory_scatter(wholememory_tensor_t input_tensor,
wholememory_tensor_t indices_tensor,
wholememory_tensor_t wholememory_tensor,
wholememory_env_func_t* p_env_fns,
void* stream);
void* stream,
int scatter_sms = -1);

/**
* Just a test function,
Expand Down
Loading
Loading