diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index 54e663ebe3..8b7b3dae6a 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -44,6 +44,7 @@ def test_load_hub(self):
             prepared_path = Path(tmp_dir) / "prepared"
             cfg = DictDefault(
                 {
+                    "tokenizer_config": "huggyllama/llama-7b",
                     "sequence_len": 1024,
                     "datasets": [
                         {
@@ -80,6 +81,7 @@ def test_load_local_hub(self):
             # how to load it.
             cfg = DictDefault(
                 {
+                    "tokenizer_config": "huggyllama/llama-7b",
                     "sequence_len": 1024,
                     "datasets": [
                         {
@@ -113,6 +115,7 @@ def test_load_from_save_to_disk(self):
             prepared_path = Path(tmp_dir) / "prepared"
             cfg = DictDefault(
                 {
+                    "tokenizer_config": "huggyllama/llama-7b",
                     "sequence_len": 256,
                     "datasets": [
                         {
@@ -143,6 +146,7 @@ def test_load_from_dir_of_parquet(self):
             prepared_path: Path = Path(tmp_dir) / "prepared"
             cfg = DictDefault(
                 {
+                    "tokenizer_config": "huggyllama/llama-7b",
                     "sequence_len": 256,
                     "datasets": [
                         {
@@ -178,6 +182,7 @@ def test_load_from_dir_of_json(self):
             prepared_path: Path = Path(tmp_dir) / "prepared"
             cfg = DictDefault(
                 {
+                    "tokenizer_config": "huggyllama/llama-7b",
                     "sequence_len": 256,
                     "datasets": [
                         {
@@ -211,6 +216,7 @@ def test_load_from_single_parquet(self):
             prepared_path: Path = Path(tmp_dir) / "prepared"
             cfg = DictDefault(
                 {
+                    "tokenizer_config": "huggyllama/llama-7b",
                     "sequence_len": 256,
                     "datasets": [
                         {
@@ -240,6 +246,7 @@ def test_load_from_single_json(self):
             prepared_path: Path = Path(tmp_dir) / "prepared"
             cfg = DictDefault(
                 {
+                    "tokenizer_config": "huggyllama/llama-7b",
                     "sequence_len": 256,
                     "datasets": [
                         {