Skip to content

Commit

Permalink
[tt-train] Fix tt-train in main branch (#15232)
Browse files Browse the repository at this point in the history
* Fix path for tokenizer.
* Change location of data folder from tt-train/build/data to tt-train/data.
* Remove !data from .gitignore in the folder.
* Revert change to shakespeare dataset.
  • Loading branch information
rfurko-tt authored Nov 19, 2024
1 parent f84fab5 commit 697ccc7
Show file tree
Hide file tree
Showing 5 changed files with 6 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-artifact.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ jobs:
cat build/ccache.stats >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
- name: 'Tar files'
run: tar -cvhf ttm_${{ matrix.arch }}.tar ttnn/ttnn/*.so build/lib ttnn/ttnn/*.so build/programming_examples build/test build/tools build/tt-train build/data runtime
run: tar -cvhf ttm_${{ matrix.arch }}.tar ttnn/ttnn/*.so build/lib ttnn/ttnn/*.so build/programming_examples build/test build/tools build/tt-train data runtime
- name: 'Upload Artifact'
uses: actions/upload-artifact@v4
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/tt-train-post-commit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
ARCH_NAME: ${{ inputs.arch }}
LOGURU_LEVEL: INFO
LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
TEST_DATA_DIR: ${{ github.workspace }}/build/data
TEST_DATA_DIR: ${{ github.workspace }}/data
runs-on:
- ${{ inputs.runner-label }}
- cloud-virtual-machine
Expand Down
2 changes: 0 additions & 2 deletions tt-train/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -47,5 +47,3 @@ venv/
wandb/

cluster_descriptor.yaml

!data/
6 changes: 3 additions & 3 deletions tt-train/sources/examples/nano_gpt/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ set(SOURCES
add_executable(nano_gpt ${SOURCES})
target_link_libraries(nano_gpt PRIVATE ttml)

add_definitions(-DDATA_FOLDER="${CMAKE_BINARY_DIR}/data")
add_definitions(-DDATA_FOLDER="${CMAKE_SOURCE_DIR}/data")

# Define the target file location
set(SHAKESPEARE_URL "https://www.cs.princeton.edu/courses/archive/spring20/cos302/files/shakespeare.txt")
set(SHAKESPEARE_FILE "${CMAKE_BINARY_DIR}/data/shakespeare.txt")
set(SHAKESPEARE_URL "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt")
set(SHAKESPEARE_FILE "${CMAKE_SOURCE_DIR}/data/shakespeare.txt")

# Check if the file already exists before downloading
if(NOT EXISTS "${SHAKESPEARE_FILE}")
Expand Down
2 changes: 1 addition & 1 deletion tt-train/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ add_definitions(-DTEST_DATA_DIR="${CMAKE_SOURCE_DIR}/data")

# Define the target file location
set(TOKENIZER_URL "https://huggingface.co/togethercomputer/RedPajama-INCITE-Chat-3B-v1/resolve/main/tokenizer.json")
set(TOKENIZER_FILE "${CMAKE_BINARY_DIR}/data/tokenizer.json")
set(TOKENIZER_FILE "${CMAKE_SOURCE_DIR}/data/tokenizer.json")

# Check if the file already exists before downloading
if(NOT EXISTS "${TOKENIZER_FILE}")
Expand Down

0 comments on commit 697ccc7

Please sign in to comment.