Lambda big run: Feb 28

AdityaNG · Feb 27, 2024 · 09df6a8 · 09df6a8
1 parent 58dfa6b
commit 09df6a8
Show file tree

Hide file tree

Showing 3 changed files with 44 additions and 23 deletions.
diff --git a/drivellava/constants.py b/drivellava/constants.py
@@ -74,12 +74,12 @@ def get_json(encoded_video_path: str) -> str:
 DECODER_ONNX_PATH = os.path.join(COMMAVQ_GPT2M_DIR, "decoder.onnx")
 
 # assert os.path.isfile(ENCODER_ONNX_PATH)
-assert os.path.isfile(DECODER_ONNX_PATH), DECODER_ONNX_PATH
+# assert os.path.isfile(DECODER_ONNX_PATH), DECODER_ONNX_PATH
 
-assert len(ENCODED_VIDEOS) > 0
-assert len(VAL_ENCODED_VIDEOS) > 0
-assert len(ENCODED_POSE) > 0
-assert len(VAL_ENCODED_VIDEOS) > 0
+# assert len(ENCODED_VIDEOS) > 0
+# assert len(VAL_ENCODED_VIDEOS) > 0
+# assert len(ENCODED_POSE) > 0
+# assert len(VAL_ENCODED_VIDEOS) > 0
 
 # COMMA_LLAVA_SPARSE_JSON_DATASET = os.path.join(
 #     COMMAVQ_DIR, "comma_llava_sparse.json"

diff --git a/drivellava/scripts/eval.py b/drivellava/scripts/eval.py
@@ -48,9 +48,9 @@ def main():
 
     NUM_FRAMES = 20 * 1
 
-    encoded_video_path = "/root/Datasets/commavq/data_0_to_2500/000e83c564317de4668c2cb372f89b91_6.npy"  # noqa
+    encoded_video_path = "/root/Datasets/commavq/val/fe809f0fff5562cc4d2bdc073d242123_31.npy"  # noqa
 
-    assert os.path.isfile(encoded_video_path), encoded_video_path
+    # assert os.path.isfile(encoded_video_path), encoded_video_path
 
     pose_path = encoded_video_path.replace("data_", "pose_data_").replace(
         "val", "pose_val"

diff --git a/drivellava/scripts/train.py b/drivellava/scripts/train.py
@@ -8,38 +8,57 @@
 import sys
 from typing import List
 
-from drivellava.constants import ENCODED_JSON, VAL_ENCODED_JSON
+from drivellava.constants import COMMAVQ_DIR
 
 
 def load_json_dataset(
     json_list: List[str],
 ):
+    # from drivellava.sparse_llava_dataset import generate_sparse_dataset
+
     data = []
     for json_path in json_list:
         with open(json_path, "r", encoding="utf-8") as f:
             loaded = json.load(f)
             for index in range(len(loaded)):
                 assert len(loaded[index]["conversations"][1]["value"]) == 1
+                # loaded[index][
+                #     "conversations"
+                # ][0]["value"] = generate_sparse_dataset()
             data.extend(loaded)
 
     return data
 
 
 def main():
-    train = load_json_dataset(ENCODED_JSON)
-    val = load_json_dataset(VAL_ENCODED_JSON)
-
-    train_json_path = os.path.abspath("checkpoints/train.json")
-    val_json_path = os.path.abspath("checkpoints/val.json")
-
-    # Save train to a temp file
-    with open(train_json_path, "w", encoding="utf-8") as f:
-        json_data = json.dumps(train, ensure_ascii=False, indent=4)
-        f.write(json_data)
-
-    with open(val_json_path, "w", encoding="utf-8") as f:
-        json_data = json.dumps(val, ensure_ascii=False, indent=4)
-        f.write(json_data)
+    # train = load_json_dataset(ENCODED_JSON)
+    # val = load_json_dataset(VAL_ENCODED_JSON)
+
+    # train_json_path = os.path.abspath("checkpoints/train.json")
+    # val_json_path = os.path.abspath("checkpoints/val.json")
+
+    # # Save train to a temp file
+    # with open(train_json_path, "w", encoding="utf-8") as f:
+    #     json_data = json.dumps(train, ensure_ascii=False, indent=4)
+    #     f.write(json_data)
+
+    # with open(val_json_path, "w", encoding="utf-8") as f:
+    #     json_data = json.dumps(val, ensure_ascii=False, indent=4)
+    #     f.write(json_data)
+
+    train_json_path = os.path.join(COMMAVQ_DIR, "train.json")
+    val_json_path = os.path.join(COMMAVQ_DIR, "val.json")
+
+    train = load_json_dataset(
+        [
+            train_json_path,
+        ]
+    )
+    val = load_json_dataset(
+        [
+            val_json_path,
+        ]
+    )
 
     print(f"Train: {len(train)}")
     print(f"Val: {len(val)}")
@@ -50,7 +69,9 @@ def main():
     DEEPSPEED_JSON = os.path.abspath("./config/zero3.json")
     MODEL_NAME = "liuhaotian/llava-v1.5-7b"
     DATA_PATH = train_json_path  # Replace with your JSON data path
-    IMAGE_FOLDER = "/"  # Replace with your image folder path
+    IMAGE_FOLDER = os.path.expanduser(
+        "~/Datasets/commavq"
+    )  # Replace with your image folder path
     VISION_TOWER = "openai/clip-vit-large-patch14-336"
     OUTPUT_DIR = os.path.expanduser("~/Datasets/checkpoints")