-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Summary: After the submodule update, the FA3 CUTLASS kernels now cost much more memory to build and will exhaust the CI machine. Make the following changes: 1. Build FA3 with MAX_JOBS=4 and NVCC_THREADS=1. 2. Disable xformers build as it also requires FA3, we will try to enable its build later. 3. Also disable colfax and TK build for now - will fix that later. Pull Request resolved: #120 Test Plan: CI Reviewed By: adamomainz Differential Revision: D67524390 Pulled By: xuzhao9 fbshipit-source-id: fc889fae5d51d7d2d974d2996e33e3f31d8db98e
- Loading branch information
1 parent
06c28ed
commit 5cc3976
Showing
5 changed files
with
76 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Submodule ThunderKittens
updated
383 files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
diff --git a/hopper/setup.py b/hopper/setup.py | ||
index f9f3cfd..132ce07 100644 | ||
--- a/hopper/setup.py | ||
+++ b/hopper/setup.py | ||
@@ -78,7 +78,8 @@ def check_if_cuda_home_none(global_option: str) -> None: | ||
|
||
|
||
def append_nvcc_threads(nvcc_extra_args): | ||
- return nvcc_extra_args + ["--threads", "4"] | ||
+ nvcc_threads = os.getenv("NVCC_THREADS") or "4" | ||
+ return nvcc_extra_args + ["--threads", nvcc_threads] | ||
|
||
|
||
cmdclass = {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
import os | ||
import subprocess | ||
import sys | ||
|
||
from pathlib import Path | ||
|
||
REPO_PATH = Path(os.path.abspath(__file__)).parent.parent.parent | ||
CUR_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__))) | ||
|
||
|
||
def patch_fa3(): | ||
patches = ["hopper.patch"] | ||
for patch_file in patches: | ||
patch_file_path = os.path.join(CUR_DIR, patch_file) | ||
submodule_path = str( | ||
REPO_PATH.joinpath("submodules", "flash-attention").absolute() | ||
) | ||
try: | ||
subprocess.check_output( | ||
[ | ||
"patch", | ||
"-p1", | ||
"--forward", | ||
"-i", | ||
patch_file_path, | ||
"-r", | ||
"/tmp/rej", | ||
], | ||
cwd=submodule_path, | ||
) | ||
except subprocess.SubprocessError as e: | ||
output_str = str(e.output) | ||
if "previously applied" in output_str: | ||
return | ||
else: | ||
print(str(output_str)) | ||
sys.exit(1) | ||
|
||
|
||
def install_fa3(): | ||
patch_fa3() | ||
FA3_PATH = REPO_PATH.joinpath("submodules", "flash-attention", "hopper") | ||
env = os.environ.copy() | ||
# limit nvcc memory usage on the CI machine | ||
env["MAX_JOBS"] = "8" | ||
env["NVCC_THREADS"] = "1" | ||
cmd = ["pip", "install", "-e", "."] | ||
subprocess.check_call(cmd, cwd=str(FA3_PATH.resolve()), env=env) |