From f7b618d4a01c8654d75cb7d6cb8c669f11bfa61a Mon Sep 17 00:00:00 2001
From: Simon Fan <xmfan@fb.com>
Date: Fri, 6 Dec 2024 15:26:16 -0800
Subject: [PATCH] remove -

---
 src/lib.rs                       |    2 +-
 src/parsers.rs                   |    2 +-
 src/types.rs                     |    2 +-
 tests/inputs/cache_hit_miss.log  | 1844 +++++++++++++++---------------
 tests/inputs/chromium_events.log |  114 +-
 tests/inputs/comp_failure.log    |   72 +-
 tests/inputs/comp_metrics.log    |  260 ++---
 tests/inputs/simple.log          |  468 ++++----
 tests/integration_test.rs        |   35 +-
 9 files changed, 1411 insertions(+), 1388 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index f85351b..3931507 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -380,7 +380,7 @@ pub fn parse_path(path: &PathBuf, config: ParseConfig) -> anyhow::Result<ParseOu
                 .as_ref()
                 .map_or(format!("unknown_{lineno}"), |c| match c {
                     CompileId::UserInitiated(d) => {
-                        format!("-_{}_{}_{}", d.frame_id, d.frame_compile_id, d.attempt)
+                        format!("{}_{}_{}", d.frame_id, d.frame_compile_id, d.attempt)
                     }
                     CompileId::CompiledAutogradInitiated {
                         compiled_autograd_id,
diff --git a/src/parsers.rs b/src/parsers.rs
index af80c10..2cfbf5e 100644
--- a/src/parsers.rs
+++ b/src/parsers.rs
@@ -58,7 +58,7 @@ fn simple_file_output(
         .as_ref()
         .map_or(format!("unknown_{lineno}"), |c| match c {
             CompileId::UserInitiated(d) => {
-                format!("-_{}_{}_{}", d.frame_id, d.frame_compile_id, d.attempt)
+                format!("{}_{}_{}", d.frame_id, d.frame_compile_id, d.attempt)
             }
             CompileId::CompiledAutogradInitiated {
                 compiled_autograd_id,
diff --git a/src/types.rs b/src/types.rs
index 3d5991f..879f4b2 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -169,7 +169,7 @@ impl fmt::Display for CompileId {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         match self {
             CompileId::UserInitiated(d) => {
-                write!(f, "[-/{}]", _format_dynamo_id(d))
+                write!(f, "[{}]", _format_dynamo_id(d))
             }
             CompileId::CompiledAutogradInitiated {
                 compiled_autograd_id,
diff --git a/tests/inputs/cache_hit_miss.log b/tests/inputs/cache_hit_miss.log
index 89f5929..0cc288a 100644
--- a/tests/inputs/cache_hit_miss.log
+++ b/tests/inputs/cache_hit_miss.log
@@ -1,74 +1,74 @@
-V1205 21:43:53.677000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "81aedbafe2a4af3636e7d0711b0f6cb4"}
+V1206 15:24:45.403000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "d89a3716f835eda9884b375881c8c021"}
 	{
 	"name": "dynamo",
-	"ts": 1733463833677668.2,
+	"ts": 1733527485402945.8,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:53.679000 1657128 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/convert_frame.py", 0]}
-V1205 21:43:53.679000 1657128 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/test/inductor/test_codecache.py", 1]}
-V1205 21:43:53.679000 1657128 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_inductor/test_case.py", 2]}
-V1205 21:43:53.679000 1657128 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/test_case.py", 3]}
-V1205 21:43:53.680000 1657128 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/testing/_internal/common_utils.py", 4]}
-V1205 21:43:53.680000 1657128 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/main.py", 5]}
-V1205 21:43:53.680000 1657128 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/runner.py", 6]}
-V1205 21:43:53.680000 1657128 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/suite.py", 7]}
-V1205 21:43:53.680000 1657128 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/case.py", 8]}
-V1205 21:43:53.681000 1657128 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/contextlib.py", 9]}
-V1205 21:43:53.681000 1657128 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py", 10]}
-V1205 21:43:53.681000 1657128 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 1439, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 81, "name": "inner", "filename": 9}, {"line": 81, "name": "inner", "filename": 9}, {"line": 475, "name": "test_flex_attention_caching", "filename": 1}, {"line": 460, "name": "fn", "filename": 1}, {"line": 1316, "name": "flex_attention", "filename": 10}, {"line": 1303, "name": "_flex_attention_hop_wrapper", "filename": 10}]}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.681000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "668b27045ba47538bec0dd56a70a65b4"}
+V1206 15:24:45.403000 1667746 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/convert_frame.py", 0]}
+V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/test/inductor/test_codecache.py", 1]}
+V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_inductor/test_case.py", 2]}
+V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/test_case.py", 3]}
+V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/testing/_internal/common_utils.py", 4]}
+V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/main.py", 5]}
+V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/runner.py", 6]}
+V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/suite.py", 7]}
+V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/case.py", 8]}
+V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/contextlib.py", 9]}
+V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py", 10]}
+V1206 15:24:45.405000 1667746 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 1439, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 81, "name": "inner", "filename": 9}, {"line": 81, "name": "inner", "filename": 9}, {"line": 475, "name": "test_flex_attention_caching", "filename": 1}, {"line": 460, "name": "fn", "filename": 1}, {"line": 1316, "name": "flex_attention", "filename": 10}, {"line": 1303, "name": "_flex_attention_hop_wrapper", "filename": 10}]}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.405000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "e415c7d5114d113beb39e5b78fc4c96d"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463833681693.0,
+	"ts": 1733527485405153.2,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:53.687000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 0, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.688000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96eb10>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.688000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 0, "source": "L['args'][0]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.885000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 1, "describer_id": 0, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.885000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 4, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96eb70>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.886000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 4, "source": "L['args'][1]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.887000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 2, "describer_id": 0, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.887000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 5, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ec90>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.887000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 5, "source": "L['args'][2]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.888000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 3, "describer_id": 0, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.889000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 6, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96cd10>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.889000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 6, "source": "L['args'][4][0]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.890000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 4, "describer_id": 0, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.890000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 7, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ecf0>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.891000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 7, "source": "L['args'][4][1]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.892000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 5, "describer_id": 0, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.892000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 8, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ec30>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.892000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 8, "source": "L['args'][4][2]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.893000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 6, "describer_id": 0, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.894000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 9, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ed50>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.894000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 9, "source": "L['args'][4][3]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.895000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 7, "describer_id": 0, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.895000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 10, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ce30>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.895000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 10, "source": "L['args'][4][4]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.896000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 8, "describer_id": 0, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.897000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 11, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96eff0>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.897000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 11, "source": "L['args'][4][5]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.898000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 9, "describer_id": 0, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.898000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 12, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96cdd0>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.899000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 12, "source": "L['args'][4][6]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.900000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 10, "describer_id": 0, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.900000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 13, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96f050>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.900000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 13, "source": "L['args'][4][7]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.910000 1657128 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_args_0_": [1, 4, 512, 64], "l_args_1_": [1, 4, 512, 64], "l_args_2_": [1, 4, 512, 64], "l_args_4_0_": [1, 1, 16], "l_args_4_1_": [1, 1, 16, 16], "l_args_4_2_": [1, 1, 16], "l_args_4_3_": [1, 1, 16, 16], "l_args_4_4_": [1, 1, 16], "l_args_4_5_": [1, 1, 16, 16], "l_args_4_6_": [1, 1, 16], "l_args_4_7_": [1, 1, 16, 16], "getitem": [1, 4, 512, 64], "getitem_1": [1, 4, 512]}}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "52fbb526c9a1f1575361cb8ed8e7794e"}
+V1206 15:24:45.409000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 0, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.409000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab890>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.409000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 0, "source": "L['args'][0]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.519000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 1, "describer_id": 0, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.520000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 4, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab950>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.520000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 4, "source": "L['args'][1]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.520000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 2, "describer_id": 0, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.521000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 5, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5730>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.521000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 5, "source": "L['args'][2]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.521000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 3, "describer_id": 0, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.521000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 6, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982aaed0>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.522000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 6, "source": "L['args'][4][0]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.522000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 4, "describer_id": 0, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.522000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 7, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5850>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.522000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 7, "source": "L['args'][4][1]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.523000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 5, "describer_id": 0, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.523000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 8, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5790>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.523000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 8, "source": "L['args'][4][2]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.524000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 6, "describer_id": 0, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.524000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 9, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c58b0>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.524000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 9, "source": "L['args'][4][3]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.525000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 7, "describer_id": 0, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.525000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 10, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5c10>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.525000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 10, "source": "L['args'][4][4]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.526000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 8, "describer_id": 0, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.526000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 11, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5b50>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.526000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 11, "source": "L['args'][4][5]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.526000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 9, "describer_id": 0, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.526000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 12, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5cd0>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.527000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 12, "source": "L['args'][4][6]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.527000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 10, "describer_id": 0, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.527000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 13, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5bb0>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.527000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 13, "source": "L['args'][4][7]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.534000 1667746 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_args_0_": [1, 4, 512, 64], "l_args_1_": [1, 4, 512, 64], "l_args_2_": [1, 4, 512, 64], "l_args_4_0_": [1, 1, 16], "l_args_4_1_": [1, 1, 16, 16], "l_args_4_2_": [1, 1, 16], "l_args_4_3_": [1, 1, 16, 16], "l_args_4_4_": [1, 1, 16], "l_args_4_5_": [1, 1, 16, 16], "l_args_4_6_": [1, 1, 16], "l_args_4_7_": [1, 1, 16, 16], "getitem": [1, 4, 512, 64], "getitem_1": [1, 4, 512]}}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "52fbb526c9a1f1575361cb8ed8e7794e"}
 	class GraphModule(torch.nn.Module):
 	    def forward(self, L_args_0_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_args_1_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_args_2_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_args_4_0_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_1_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_args_4_2_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_3_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_args_4_4_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_5_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_args_4_6_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_7_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
 	        l_args_0_ = L_args_0_
@@ -104,33 +104,33 @@ V1205 21:43:53.910000 1657128 torch/_dynamo/output_graph.py:1336] {"dynamo_outpu
 	            ge: "b8[][]cuda:0" = child_2 >= child_3;  child_2 = child_3 = None
 	            return ge
 	            
-V1205 21:43:53.911000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ea2a47d675d8ca192b4ac868968779d4"}
+V1206 15:24:45.534000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ca2a774ae001e87e57bd31e3fb982793"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463833911640.2,
+	"ts": 1733527485534550.5,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:53.912000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "4ec5ec83bc979a392797513416fd46c4"}
+V1206 15:24:45.534000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "dbd3ef3cad5d244dd2996256c488f716"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463833912126.8,
+	"ts": 1733527485534930.2,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:53.936000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "1a438bd513497783ba024045727aa6c9"}
+V1206 15:24:45.547000 1667746 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "c168856da88c1195cba9758754bcd1a1"}
 	
 	TREE_GUARD_MANAGER:
 	+- RootGuardManager
@@ -154,7 +154,7 @@ V1205 21:43:53.936000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | +- NO_TENSOR_ALIASING
 	| | +- GuardManager: source=L['args'][3], accessed_by=TupleGetItemGuardAccessor(3)
 	| | | +- GuardManager: source=L['args'][3].__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | +- ID_MATCH: ___check_obj_id(L['args'][3].__code__, 139667217695600)     
+	| | | | +- ID_MATCH: ___check_obj_id(L['args'][3].__code__, 140062268556144)     
 	| | +- GuardManager: source=L['args'][4], accessed_by=TupleGetItemGuardAccessor(4)
 	| | | +- TYPE_MATCH: ___check_type_id(L['args'][4], 8812224)                     
 	| | | +- LENGTH_CHECK: len(L['args'][4]) == 11                                     
@@ -196,7 +196,7 @@ V1205 21:43:53.936000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | | +- EQUALS_MATCH: L['args'][4][9] == 128                                      
 	| | | +- GuardManager: source=L['args'][4][10], accessed_by=TupleGetItemGuardAccessor(10)
 	| | | | +- GuardManager: source=L['args'][4][10].__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(L['args'][4][10].__code__, 139667217696224) 
+	| | | | | +- ID_MATCH: ___check_obj_id(L['args'][4][10].__code__, 140062268556768) 
 	| | +- GuardManager: source=L['args'][5], accessed_by=TupleGetItemGuardAccessor(5)
 	| | | +- EQUALS_MATCH: L['args'][5] == 0.125                                       
 	| | +- GuardManager: source=L['args'][6], accessed_by=TupleGetItemGuardAccessor(6)
@@ -213,20 +213,20 @@ V1205 21:43:53.936000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | +- DICT_LENGTH: not L['kwargs']                                             
 	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
 	| | +- GuardManager: source=G['flex_attention_hop'], accessed_by=DictGetItemGuardAccessor('flex_attention_hop')
-	| | | +- TYPE_MATCH: ___check_type_id(G['flex_attention_hop'], 97810240)         
+	| | | +- TYPE_MATCH: ___check_type_id(G['flex_attention_hop'], 96230624)         
 	| | | +- GuardManager: source=G['flex_attention_hop'].__name__, accessed_by=GetAttrGuardAccessor(__name__)
 	| | | | +- EQUALS_MATCH: G['flex_attention_hop'].__name__ == 'flex_attention'        
-	| | +- GuardManager: source=G['_139667213076496_c0'], accessed_by=DictGetItemGuardAccessor('_139667213076496_c0')
-	| | | +- GuardManager: source=G['_139667213076496_c0'].Tensor, accessed_by=GetAttrGuardAccessor(Tensor)
-	| | | | +- ID_MATCH: ___check_obj_id(G['_139667213076496_c0'].Tensor, 82291104)  
-	| | | | +- GuardManager: source=G['_139667213076496_c0'].Tensor.__bases__, accessed_by=GetAttrGuardAccessor(__bases__)
-	| | | | | +- GuardManager: source=G['_139667213076496_c0'].Tensor.__bases__[0], accessed_by=TupleGetItemGuardAccessor(0)
-	| | | | | | +- ID_MATCH: ___check_obj_id(G['_139667213076496_c0'].Tensor.__bases__[0], 139667196393888)
+	| | +- GuardManager: source=G['_140062263790704_c0'], accessed_by=DictGetItemGuardAccessor('_140062263790704_c0')
+	| | | +- GuardManager: source=G['_140062263790704_c0'].Tensor, accessed_by=GetAttrGuardAccessor(Tensor)
+	| | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c0'].Tensor, 82181376)  
+	| | | | +- GuardManager: source=G['_140062263790704_c0'].Tensor.__bases__, accessed_by=GetAttrGuardAccessor(__bases__)
+	| | | | | +- GuardManager: source=G['_140062263790704_c0'].Tensor.__bases__[0], accessed_by=TupleGetItemGuardAccessor(0)
+	| | | | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c0'].Tensor.__bases__[0], 140062119703136)
 	| | +- GuardManager: source=G['__builtins_dict___0'], accessed_by=DictGetItemGuardAccessor('__builtins_dict___0')
 	| | | +- GuardManager: source=G['__builtins_dict___0']['len'], accessed_by=DictGetItemGuardAccessor('len')
-	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___0']['len'], 139667218584480)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___0']['len'], 140062269592480)
 	| | | +- GuardManager: source=G['__builtins_dict___0']['sum'], accessed_by=DictGetItemGuardAccessor('sum')
-	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___0']['sum'], 139667218585600)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___0']['sum'], 140062269593600)
 	| | | +- GuardManager: source=G['__builtins_dict___0']['list'], accessed_by=DictGetItemGuardAccessor('list')
 	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___0']['list'], 8841312)  
 	| | | +- GuardManager: source=G['__builtins_dict___0']['type'], accessed_by=DictGetItemGuardAccessor('type')
@@ -236,16 +236,16 @@ V1205 21:43:53.936000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | +- GuardManager: source=G['__builtins_dict___0']['object'], accessed_by=DictGetItemGuardAccessor('object')
 	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___0']['object'], 8810976)
 	| | | +- GuardManager: source=G['__builtins_dict___0']['isinstance'], accessed_by=DictGetItemGuardAccessor('isinstance')
-	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___0']['isinstance'], 139667218584160)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___0']['isinstance'], 140062269592160)
 	| | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_utils_dot__pytree')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'], 139665266915776)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'], 140057600329744)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].TreeSpec, accessed_by=GetAttrGuardAccessor(TreeSpec)
-	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].TreeSpec, 87414528)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].TreeSpec, 86791872)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf, accessed_by=GetAttrGuardAccessor(_is_leaf)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, 139665266954544)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, 140057601749808)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, accessed_by=GetAttrGuardAccessor(_LEAF_SPEC)
-	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, 87437744)
+	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, 87025712)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, accessed_by=GetAttrGuardAccessor(type)
 	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, 8822752)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.context, accessed_by=GetAttrGuardAccessor(context)
@@ -261,44 +261,44 @@ V1205 21:43:53.936000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | | | +- LENGTH_CHECK: not G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type, accessed_by=GetAttrGuardAccessor(_get_node_type)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, 139665266954272)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, 140057601749536)
 	| | | +- DictGuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES, accessed_by=GetAttrGuardAccessor(SUPPORTED_NODES)
-	| | | | +- DICT_VERSION: ___dict_version(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES) == 431080
+	| | | | +- DICT_VERSION: ___dict_version(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES) == 366913
 	| | | | +- KeyValueManager pair at index=1
 	| | | | | +- ValueManager: GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]]
 	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn, accessed_by=GetAttrGuardAccessor(flatten_fn)
 	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, 139665266871808)
+	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, 140057599989696)
 	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn, accessed_by=GetAttrGuardAccessor(unflatten_fn)
 	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, 139665266552240)
+	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, 140057600267792)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper, accessed_by=GetAttrGuardAccessor(_tree_flatten_helper)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, 139665266608368)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, 140057600001392)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance, accessed_by=GetAttrGuardAccessor(_is_namedtuple_instance)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, 139665266607920)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, 140057600000944)
 	
-V1205 21:43:53.936000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "2cac46b866e92661cd25de5abc8596ab"}
+V1206 15:24:45.547000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "9f584d7434070bbe2c589c1cc5c9fa1c"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463833936672.2,
+	"ts": 1733527485547627.0,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:53.941000 1657128 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 486, "dynamo_cumulative_compile_time_us": 254979, "frame_key": "1", "co_name": "_flex_attention_hop_wrapper", "co_filename": "/data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py", "co_firstlineno": 1303, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 58, "shape_env_guard_count": 0, "graph_op_count": 3, "graph_node_count": 17, "graph_input_count": 11, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 9223372036854775807, \"accumulated_cache_size_limit\": 9223372036854775807, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch.testing\", \"torch._decomp\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:53.942000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "8541d5e419b86b050711345af178a854"}
+V1206 15:24:45.550000 1667746 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 379, "dynamo_cumulative_compile_time_us": 142473, "frame_key": "1", "co_name": "_flex_attention_hop_wrapper", "co_filename": "/data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py", "co_firstlineno": 1303, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 58, "shape_env_guard_count": 0, "graph_op_count": 3, "graph_node_count": 17, "graph_input_count": 11, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 9223372036854775807, \"accumulated_cache_size_limit\": 9223372036854775807, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.550000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "4808e1bc6347d31bf5778999c536de01"}
 	{
 	"name": "dynamo",
-	"ts": 1733463833942042.5,
+	"ts": 1733527485550622.8,
 	"args": {
-	"compile_id": "-/0/0",
+	"compile_id": "0/0",
 	"frame_key": "1",
 	"co_name": "_flex_attention_hop_wrapper",
 	"co_filename": "/data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py",
@@ -319,73 +319,73 @@ V1205 21:43:53.942000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"restart_reasons": [],
 	"dynamo_time_before_restart_s": 0.0,
 	"has_guarded_code": true,
-	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 9223372036854775807, \"accumulated_cache_size_limit\": 9223372036854775807, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch.testing\", \"torch._decomp\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
+	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 9223372036854775807, \"accumulated_cache_size_limit\": 9223372036854775807, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:55.774000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "6d3812c5546dd7f7cba73388188aaea7"}
+V1206 15:24:45.976000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b144b519a5591ad277125a12c84b389b"}
 	{
 	"name": "dynamo",
-	"ts": 1733463835774035.8,
+	"ts": 1733527485976303.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:55.774000 1657128 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 1439, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 81, "name": "inner", "filename": 9}, {"line": 81, "name": "inner", "filename": 9}, {"line": 475, "name": "test_flex_attention_caching", "filename": 1}, {"line": 459, "name": "fn", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:55.775000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b6f010f589ae40d28fd50321ca390ea3"}
+V1206 15:24:45.976000 1667746 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 1439, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 81, "name": "inner", "filename": 9}, {"line": 81, "name": "inner", "filename": 9}, {"line": 475, "name": "test_flex_attention_caching", "filename": 1}, {"line": 459, "name": "fn", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.977000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b2b1c355b023de0af140013ba14024ed"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463835775132.8,
+	"ts": 1733527485977092.0,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:55.777000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 9, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:55.778000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96eb10>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:55.778000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 0, "source": "L['q']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:55.789000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 1, "describer_id": 9, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:55.790000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 1, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96eb70>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:55.790000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 1, "source": "L['k']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:55.791000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 2, "describer_id": 9, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:55.792000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 2, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ec90>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:55.792000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 2, "source": "L['v']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:55.811000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 3, "describer_id": 9, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:55.811000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 3, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96cd10>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:55.812000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 3, "source": "L['block_mask'].kv_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.085000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 4, "describer_id": 9, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.086000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 7, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ecf0>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.086000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 7, "source": "L['block_mask'].kv_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.087000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 5, "describer_id": 9, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.088000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 8, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ec30>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.088000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 8, "source": "L['block_mask'].full_kv_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.089000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 6, "describer_id": 9, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.089000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 9, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ed50>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.090000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 9, "source": "L['block_mask'].full_kv_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.091000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 7, "describer_id": 9, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.091000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 10, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ce30>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.092000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 10, "source": "L['block_mask'].q_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.093000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 8, "describer_id": 9, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.093000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 11, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96eff0>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.093000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 11, "source": "L['block_mask'].q_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.095000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 9, "describer_id": 9, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.095000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 12, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96cdd0>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.095000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 12, "source": "L['block_mask'].full_q_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.096000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 10, "describer_id": 9, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.097000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 13, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96f050>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.097000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 13, "source": "L['block_mask'].full_q_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:43:56.105000 1657128 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_q_": [1, 4, 512, 64], "l_k_": [1, 4, 512, 64], "l_v_": [1, 4, 512, 64], "l_block_mask_kv_num_blocks": [1, 1, 16], "l_block_mask_kv_indices": [1, 1, 16, 16], "l_block_mask_full_kv_num_blocks": [1, 1, 16], "l_block_mask_full_kv_indices": [1, 1, 16, 16], "l_block_mask_q_num_blocks": [1, 1, 16], "l_block_mask_q_indices": [1, 1, 16, 16], "l_block_mask_full_q_num_blocks": [1, 1, 16], "l_block_mask_full_q_indices": [1, 1, 16, 16], "out": [1, 4, 512, 64]}}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "e6aa2ecb5d533ab181a6215ce16f359c"}
+V1206 15:24:45.979000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 9, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.979000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab890>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.979000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 0, "source": "L['q']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.985000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 1, "describer_id": 9, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.986000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 1, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab950>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.986000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 1, "source": "L['k']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.987000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 2, "describer_id": 9, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.987000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 2, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5730>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.987000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 2, "source": "L['v']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.998000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 3, "describer_id": 9, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.998000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 3, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982aaed0>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:45.998000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 3, "source": "L['block_mask'].kv_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.071000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 4, "describer_id": 9, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.071000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 7, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5850>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.071000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 7, "source": "L['block_mask'].kv_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.072000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 5, "describer_id": 9, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.072000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 8, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5790>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.072000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 8, "source": "L['block_mask'].full_kv_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.073000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 6, "describer_id": 9, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.073000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 9, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c58b0>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.073000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 9, "source": "L['block_mask'].full_kv_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.074000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 7, "describer_id": 9, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.074000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 10, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5c10>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.074000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 10, "source": "L['block_mask'].q_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.075000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 8, "describer_id": 9, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.075000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 11, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5b50>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.075000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 11, "source": "L['block_mask'].q_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.076000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 9, "describer_id": 9, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.076000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 12, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5cd0>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.076000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 12, "source": "L['block_mask'].full_q_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.077000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 10, "describer_id": 9, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.077000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 13, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5bb0>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.077000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 13, "source": "L['block_mask'].full_q_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:46.082000 1667746 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_q_": [1, 4, 512, 64], "l_k_": [1, 4, 512, 64], "l_v_": [1, 4, 512, 64], "l_block_mask_kv_num_blocks": [1, 1, 16], "l_block_mask_kv_indices": [1, 1, 16, 16], "l_block_mask_full_kv_num_blocks": [1, 1, 16], "l_block_mask_full_kv_indices": [1, 1, 16, 16], "l_block_mask_q_num_blocks": [1, 1, 16], "l_block_mask_q_indices": [1, 1, 16, 16], "l_block_mask_full_q_num_blocks": [1, 1, 16], "l_block_mask_full_q_indices": [1, 1, 16, 16], "out": [1, 4, 512, 64]}}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "e6aa2ecb5d533ab181a6215ce16f359c"}
 	class GraphModule(torch.nn.Module):
 	    def forward(self, L_q_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_k_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_v_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_block_mask_kv_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_kv_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_block_mask_full_kv_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_full_kv_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_block_mask_q_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_q_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_block_mask_full_q_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_full_q_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
 	        l_q_ = L_q_
@@ -420,56 +420,56 @@ V1205 21:43:56.105000 1657128 torch/_dynamo/output_graph.py:1336] {"dynamo_outpu
 	            ge: "b8[][]cuda:0" = child_2 >= child_3;  child_2 = child_3 = None
 	            return ge
 	            
-V1205 21:43:56.105000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "06380d0939355062022641f561c59be1"}
+V1206 15:24:46.082000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "37b24b3f85c3e5d89e19d2ed089a024a"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463836105734.0,
+	"ts": 1733527486082618.8,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:56.106000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ffe49f114bd6731b812dace005de7124"}
+V1206 15:24:46.082000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "1cb1b51c68c2aa91138743ed176863dc"}
 	{
 	"name": "_recursive_pre_grad_passes",
-	"ts": 1733463836106181.2,
+	"ts": 1733527486082898.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:56.110000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d4a75fcbfd9b3bf9ea52ae9ec0e93437"}
+V1206 15:24:46.085000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "fe96e7264be7dc8710649f24088a12a7"}
 	{
 	"name": "_recursive_pre_grad_passes",
-	"ts": 1733463836110162.0,
+	"ts": 1733527486085403.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:56.114000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "3700a256230321b45d538e33d7eb17f8"}
+V1206 15:24:46.087000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "f3b0a502ac862b43ef8e578cbbbedaef"}
 	{
 	"name": "create_aot_dispatcher_function",
-	"ts": 1733463836114816.2,
+	"ts": 1733527486087844.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:56.214000 1657128 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:214] {"artifact": {"name": "aot_forward_graph_fw_metadata", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d952b3071dd9ba606a04d644841d9aba"}
+V1206 15:24:46.137000 1667746 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:214] {"artifact": {"name": "aot_forward_graph_fw_metadata", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d952b3071dd9ba606a04d644841d9aba"}
 	ViewAndMutationMeta(input_info=[InputAliasInfo(is_leaf=True,
 	                                              mutates_data=False,
 	                                              mutates_metadata=False,
@@ -613,7 +613,7 @@ V1205 21:43:56.214000 1657128 torch/_functorch/_aot_autograd/dispatch_and_compil
 	                    indices_of_inputs_that_requires_grad_with_mutations_in_bw=[],
 	                    bw_donated_idxs=None,
 	                    num_backward_tokens=0)
-V1205 21:43:56.216000 1657128 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:232] {"aot_inference_graph": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "02884732a6f03bac463b0e1e5f9b1cf4"}
+V1206 15:24:46.138000 1667746 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:232] {"aot_inference_graph": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "02884732a6f03bac463b0e1e5f9b1cf4"}
 	class <lambda>(torch.nn.Module):
 	    def forward(self, arg0_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg1_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg2_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg3_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg4_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg5_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg6_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg7_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg8_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg9_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg10_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
 	         # File: /data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py:1286 in flex_attention, code: out, lse = flex_attention_hop(
@@ -636,80 +636,80 @@ V1205 21:43:56.216000 1657128 torch/_functorch/_aot_autograd/dispatch_and_compil
 	            ge: "b8[][]cuda:0" = torch.ops.aten.ge.Tensor(arg2_1, arg3_1);  arg2_1 = arg3_1 = None
 	            return ge
 	            
-V1205 21:43:56.217000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "868060a95e552ef9b5a2c2a2dc8546a0"}
+V1206 15:24:46.139000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "41345436bd100a92ac2e1c11dbcfde73"}
 	{
 	"name": "compile_fx.<locals>.fw_compiler_base",
-	"ts": 1733463836217795.8,
+	"ts": 1733527486139665.0,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:56.218000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "02a5d7c2626e8ceba903b7ac9fe4d16e"}
+V1206 15:24:46.140000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ff1f544bdf98bb91144ed9d00ccfd2c4"}
 	{
 	"name": "_recursive_joint_graph_passes",
-	"ts": 1733463836218390.8,
+	"ts": 1733527486140025.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:56.417000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "005141e0fde31a203f01059fab99413a"}
+V1206 15:24:46.240000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b7986424633a93e63eaef105f6813984"}
 	{
 	"name": "_recursive_joint_graph_passes",
-	"ts": 1733463836417672.8,
+	"ts": 1733527486240539.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:56.418000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "2f16ddcb96ee3bcfecac2da903702fef"}
+V1206 15:24:46.241000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "71eb226a27256d687d98919bf6379b6e"}
 	{
 	"name": "inductor_compile",
-	"ts": 1733463836418220.8,
+	"ts": 1733527486240972.5,
 	"args": {
 	"fn_name": "compile_fx_inner",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:56.419000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "7a6cdd4743afa40d8a7b536f91e0f12c"}
+V1206 15:24:46.241000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "e3aa25638cfc1bbeacc1a3b1146ee001"}
 	{
 	"name": "inductor_codecache_torch_key",
-	"ts": 1733463836419796.0,
+	"ts": 1733527486241893.2,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:56.613000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "8f59b9a59e0fc406ea2fe57d522adc50"}
+V1206 15:24:46.497000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "2357634e283643a8bf96005629e2a966"}
 	{
 	"name": "inductor_codecache_torch_key",
-	"ts": 1733463836613761.5,
+	"ts": 1733527486497765.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:58.365000 1657128 torch/_inductor/compile_fx.py:835] {"artifact": {"name": "fx_graph_runnable", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "7bb73344f8ffc5ea0484b87c812cde68"}
+V1206 15:24:46.953000 1667746 torch/_inductor/compile_fx.py:835] {"artifact": {"name": "fx_graph_runnable", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "2685e2fe8ae1c10315e3657921ebac7a"}
 	
 	import torch
 	from torch import tensor, device
@@ -726,10 +726,10 @@ V1205 21:43:58.365000 1657128 torch/_inductor/compile_fx.py:835] {"artifact": {"
 	torch._dynamo.config.accumulated_cache_size_limit = 256
 	torch._dynamo.config.traceable_tensor_subclasses = set()
 	torch._dynamo.config.suppress_errors = False
-	torch._dynamo.config.allowed_functions_module_string_ignorelist = {'torch._refs', 'torch.testing', 'torch._decomp', 'torch.distributions', 'torch._prims'}
+	torch._dynamo.config.allowed_functions_module_string_ignorelist = {'torch._decomp', 'torch.testing', 'torch._prims', 'torch._refs', 'torch.distributions'}
 	torch._dynamo.config._ddp_optimization_mode = ['ddp_optimizer', 'python_reducer', 'python_reducer_without_compiled_forward', 'no_optimization']
 	torch._dynamo.config.raise_on_ctx_manager_usage = True
-	torch._dynamo.config._save_config_ignore = {'skipfiles_inline_module_allowlist', 'repro_level', 'constant_functions', 'repro_after'}
+	torch._dynamo.config._save_config_ignore = {'repro_after', 'skipfiles_inline_module_allowlist', 'repro_level', 'constant_functions'}
 	torch._dynamo.config.log_compilation_metrics = False
 	torch._dynamo.config.reorderable_logging_functions = set()
 	torch._dynamo.config._autograd_backward_strict_mode_banned_ops = ['stride', 'requires_grad', 'storage_offset', 'layout', 'data', 'is_coalesced', 'is_complex', 'is_conj', 'is_contiguous', 'is_cpu', 'is_cuda', 'is_distributed', 'is_floating_point', 'is_inference', 'is_ipu', 'is_leaf', 'is_maia', 'is_meta', 'is_mkldnn', 'is_mps', 'is_mtia', 'is_neg', 'is_nested', 'is_nonzero', 'is_pinned', 'is_quantized', 'is_same_size', 'is_set_to', 'is_shared', 'is_signed', 'is_sparse', 'is_sparse_csr', 'is_vulkan', 'is_xla', 'is_xpu']
@@ -765,20 +765,20 @@ V1205 21:43:58.365000 1657128 torch/_inductor/compile_fx.py:835] {"artifact": {"
 	
 	
 	
-	# torch version: 2.6.0a0+git5f4afda
-	# torch cuda version: 12.1
-	# torch git version: 5f4afda82a5a7a708effa35379140b88511b1f5f
+	# torch version: 2.6.0a0+giteece9ec
+	# torch cuda version: 12.2
+	# torch git version: eece9ecd62cae84bc2f915fc48cffe43e30256aa
 	
 	
 	# CUDA Info: 
 	# nvcc: NVIDIA (R) Cuda compiler driver 
 	# Copyright (c) 2005-2023 NVIDIA Corporation 
-	# Built on Mon_Apr__3_17:16:06_PDT_2023 
-	# Cuda compilation tools, release 12.1, V12.1.105 
-	# Build cuda_12.1.r12.1/compiler.32688072_0 
+	# Built on Tue_Aug_15_22:02:13_PDT_2023 
+	# Cuda compilation tools, release 12.2, V12.2.140 
+	# Build cuda_12.2.r12.2/compiler.33191640_0 
 	
 	# GPU Hardware Info: 
-	# NVIDIA PG509-210 : 8 
+	# NVIDIA H100 : 8 
 	
 	
 	from torch.nn import *
@@ -829,31 +829,31 @@ V1205 21:43:58.365000 1657128 torch/_inductor/compile_fx.py:835] {"artifact": {"
 	        # To run it separately, do 
 	        # mod, args = run_repro(mod, load_args, accuracy=False, command='get_args', save_dir=None, tracing_mode='real', check_str=None)
 	        # mod(*args)
-V1205 21:43:58.372000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "669554dac6e661fe086afdef8c1679d3"}
+V1206 15:24:46.957000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "59eead8c2c99ba4c19b6f2e1672cfb1c"}
 	{
 	"name": "_recursive_post_grad_passes",
-	"ts": 1733463838372004.0,
+	"ts": 1733527486957768.2,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:58.388000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "97550858845d14af037067ab2e1080e2"}
+V1206 15:24:46.966000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "65cd5e088aa4752db9445bb36e57e3aa"}
 	{
 	"name": "_recursive_post_grad_passes",
-	"ts": 1733463838388180.5,
+	"ts": 1733527486966834.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:58.390000 1657128 torch/_inductor/compile_fx.py:898] {"inductor_post_grad_graph": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "02884732a6f03bac463b0e1e5f9b1cf4"}
+V1206 15:24:46.968000 1667746 torch/_inductor/compile_fx.py:898] {"inductor_post_grad_graph": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "02884732a6f03bac463b0e1e5f9b1cf4"}
 	class <lambda>(torch.nn.Module):
 	    def forward(self, arg0_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg1_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg2_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg3_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg4_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg5_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg6_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg7_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg8_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg9_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg10_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
 	         # File: /data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py:1286 in flex_attention, code: out, lse = flex_attention_hop(
@@ -876,176 +876,176 @@ V1205 21:43:58.390000 1657128 torch/_inductor/compile_fx.py:898] {"inductor_post
 	            ge: "b8[][]cuda:0" = torch.ops.aten.ge.Tensor(arg2_1, arg3_1);  arg2_1 = arg3_1 = None
 	            return ge
 	            
-V1205 21:43:58.396000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b4ed9030ed413d784bdfcebd78c34f0d"}
+V1206 15:24:46.972000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "48d0779166f135a77ff5cde83cf71d3e"}
 	{
 	"name": "GraphLowering.run",
-	"ts": 1733463838396350.8,
+	"ts": 1733527486972460.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:58.675000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "2d941777c1a6b8999302927941869f47"}
+V1206 15:24:47.138000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "6627ba3ae1644229670ab4d3fb0d4169"}
 	{
 	"name": "GraphLowering.run",
-	"ts": 1733463838675652.8,
+	"ts": 1733527487138160.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:58.676000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "08cdbf10a169c181fff5f06cd880d4ac"}
+V1206 15:24:47.138000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "1cd377482ee4a0ca5764fecedf20d954"}
 	{
 	"name": "GraphLowering.compile_to_fn",
-	"ts": 1733463838676420.0,
+	"ts": 1733527487138712.0,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:58.677000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "debf50bbf3a7dc7c07c87530053ba60e"}
+V1206 15:24:47.139000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "f4590e34449b7bb09aed2c93c625760d"}
 	{
 	"name": "code_gen",
-	"ts": 1733463838677128.8,
+	"ts": 1733527487139024.8,
 	"args": {
 	"fn_name": "GraphLowering.compile_to_module",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:58.677000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "51d0b2b54aec1fbece8fdcc94be0bfba"}
+V1206 15:24:47.139000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "7b65eb21df5478dbf8fba3b59850be45"}
 	{
 	"name": "GraphLowering.codegen",
-	"ts": 1733463838677568.5,
+	"ts": 1733527487139268.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:58.679000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b41d6f118fa6f485c18faad5cb5a8ab0"}
+V1206 15:24:47.140000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "9f9bd4fb5f640d8a4af811e82da8f2b7"}
 	{
 	"name": "Scheduler.__init__",
-	"ts": 1733463838679907.2,
+	"ts": 1733527487140643.2,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:58.685000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "7e64fbce28339576548d9fefa770e0bf"}
+V1206 15:24:47.144000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "36fb5520981a68e9cd0a7455ec7e42d1"}
 	{
 	"name": "Scheduler.fused_nodes",
-	"ts": 1733463838685574.0,
+	"ts": 1733527487144035.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:58.686000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "8302feaa8fd28cdb12667320059d5545"}
+V1206 15:24:47.144000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ff4c0585a764bffbe15620787ef7984e"}
 	{
 	"name": "Scheduler.fused_nodes",
-	"ts": 1733463838686141.5,
+	"ts": 1733527487144372.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:58.690000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "f01ec8bcb4e9d0bb5e2c0a8e37175876"}
+V1206 15:24:47.146000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b2127891028545727d5538178e766dfd"}
 	{
 	"name": "Scheduler.__init__",
-	"ts": 1733463838690764.0,
+	"ts": 1733527487146903.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:58.691000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "4d2c218ad1f8f544c6908dd6175acffd"}
+V1206 15:24:47.147000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "7d89af7c7d58849f171d9e78f71d86a2"}
 	{
 	"name": "Scheduler.codegen",
-	"ts": 1733463838691153.8,
+	"ts": 1733527487147125.2,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:58.701000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "6c696c5380c23229a6a87875883681db"}
+V1206 15:24:47.153000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "3b4150db66839c4586992ffee613eae4"}
 	{
 	"name": "Scheduler.codegen",
-	"ts": 1733463838700969.8,
+	"ts": 1733527487153011.0,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:58.701000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "577e41d5ea21db99df2f753d9f499f7c"}
+V1206 15:24:47.153000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "bf81162402f2ae6984f652a3f44baa84"}
 	{
 	"name": "PythonWrapperCodegen.generate",
-	"ts": 1733463838701334.5,
+	"ts": 1733527487153239.2,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:58.703000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "50d2eedf8843ada257f032a7e0a59447"}
+V1206 15:24:47.154000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "dcff0d1fc8b779e008cb24ec2e74e7f7"}
 	{
 	"name": "PythonWrapperCodegen.generate",
-	"ts": 1733463838703719.0,
+	"ts": 1733527487154470.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:58.704000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "8e9a1f56f6981796b08741e5df2c6360"}
+V1206 15:24:47.154000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "9be98be603274f80673ac2f6d241cec4"}
 	{
 	"name": "GraphLowering.codegen",
-	"ts": 1733463838704137.8,
+	"ts": 1733527487154692.2,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:58.705000 1657128 torch/_inductor/graph.py:2030] {"inductor_output_code": {"filename": "/tmp/tmph3za5c24/vs/cvs4vumksf6bwll3igphroshjevbea7jp53cxywxrhliugsk43fm.py"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "0aa5b4706bd4357e344ccb8187abb38c"}
+V1206 15:24:47.155000 1667746 torch/_inductor/graph.py:2030] {"inductor_output_code": {"filename": "/tmp/tmpsn77tlsm/sm/csmb7f23sz2oa7mngdcoprpgrrbziqzirwhkjffdmxv6qhdf3e57.py"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "9ced76d2b85dd84790793e85fcadcc28"}
 	# AOT ID: ['0_inference']
 	from ctypes import c_void_p, c_long, c_int
 	import torch
@@ -1087,7 +1087,7 @@ V1205 21:43:58.705000 1657128 torch/_inductor/graph.py:2030] {"inductor_output_c
 	empty_strided_p2p = torch._C._distributed_c10d._SymmetricMemory.empty_strided_p2p
 	
 	
-	# kernel path: /tmp/tmph3za5c24/wq/cwqhrcmg46q7si24xk2wvtwdqmnsixsarf7zy5e6poex5bl4gdlu.py
+	# kernel path: /tmp/tmpsn77tlsm/xo/cxokkamvqulf4elvqs25iez7wbmqqfbp3i4qy7g2hwvher2zrziz.py
 	# Topologically Sorted Source Nodes: [flex_attention], Original ATen: []
 	# Source node to ATen node mapping:
 	#   flex_attention => flex_attention
@@ -1105,8 +1105,8 @@ V1205 21:43:58.705000 1657128 torch/_inductor/graph.py:2030] {"inductor_output_c
 	@triton_heuristics.template(
 	    num_stages=3,
 	    num_warps=4,
-	    triton_meta={'signature': {'arg_Q': '*fp32', 'arg_K': '*fp32', 'arg_V': '*fp32', 'arg_LSE': '*fp32', 'arg_KV_NUM_BLKS': '*i32', 'arg_KV_IDX': '*i32', 'arg_FULL_KV_NUM_BLKS': '*i32', 'arg_FULL_KV_IDX': '*i32', 'out_ptr0': '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=80, major=8, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=108, warp_size=32), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8), equal_to_1=())]},
-	    inductor_meta={'kernel_name': 'triton_tem_fused_0', 'backend_hash': '59CDC28C5AC44AE92A1C88C87D935A8E2ADCDA7AF532B6269756431229691604', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': False, 'autotune_pointwise': True, 'autotune_remote_cache': False, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	    triton_meta={'signature': {'arg_Q': '*fp32', 'arg_K': '*fp32', 'arg_V': '*fp32', 'arg_LSE': '*fp32', 'arg_KV_NUM_BLKS': '*i32', 'arg_KV_IDX': '*i32', 'arg_FULL_KV_NUM_BLKS': '*i32', 'arg_FULL_KV_IDX': '*i32', 'out_ptr0': '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132, warp_size=32), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8), equal_to_1=())]},
+	    inductor_meta={'kernel_name': 'triton_tem_fused_0', 'backend_hash': '562E840D41CEB1D8E51DE726EA7592B0C37A0C6FBD72CF4E958863CEC11D41A7', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': False, 'autotune_pointwise': True, 'autotune_remote_cache': False, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
 	)
 	@triton.jit
 	def triton_tem_fused_0(arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0):
@@ -1603,109 +1603,109 @@ V1205 21:43:58.705000 1657128 torch/_inductor/graph.py:2030] {"inductor_output_c
 	    from torch._inductor.wrapper_benchmark import compiled_module_main
 	    compiled_module_main('None', benchmark_compiled_module)
 	
-V1205 21:43:58.705000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "73158bcb4abe2d3bf2377c0df65afa30"}
+V1206 15:24:47.155000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "495b4d06507b7298850f179495a17b02"}
 	{
 	"name": "PyCodeCache.load_by_key_path",
-	"ts": 1733463838705738.8,
+	"ts": 1733527487155553.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:43:58.737000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "e779c587976847f0970359b8b439ae8a"}
+V1206 15:24:47.173000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "6dee17cfbaa0ca0649062deba0d7d459"}
 	{
 	"name": "async_compile.wait",
-	"ts": 1733463838737494.5,
+	"ts": 1733527487173738.0,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.045000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "5f4772c10114158bbbcc75d7c22fe3c9"}
+V1206 15:24:50.157000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "2d51e2d3dcba45c4d0d4d29f299f024a"}
 	{
 	"name": "async_compile.wait",
-	"ts": 1733463844045788.8,
+	"ts": 1733527490157741.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.046000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "7abb29aa1748a3babaf48ec4274dc03b"}
+V1206 15:24:50.158000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "0b68c334c8533667a781e2f662968ca9"}
 	{
 	"name": "PyCodeCache.load_by_key_path",
-	"ts": 1733463844046411.0,
+	"ts": 1733527490158210.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.046000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "29c3133f457abfd5213c0d166d78836f"}
+V1206 15:24:50.158000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "60e7a41770b95fe1d25ce4274e02d3ef"}
 	{
 	"name": "code_gen",
-	"ts": 1733463844046895.5,
+	"ts": 1733527490158517.5,
 	"args": {
 	"fn_name": "GraphLowering.compile_to_module",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.047000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "0cbfab0d0b3560ce13a0768c35986bb3"}
+V1206 15:24:50.158000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "02264d8aae3ebc56684debf0c3f9f4de"}
 	{
 	"name": "GraphLowering.compile_to_fn",
-	"ts": 1733463844047406.8,
+	"ts": 1733527490158861.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.084000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "fc5c063fc0360dd90ed6fe8bac348ac6"}
+V1206 15:24:50.251000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "a469a724b0141b56eff140f8388e6f2b"}
 	{
 	"name": "TritonBundler.collect",
-	"ts": 1733463844084747.8,
+	"ts": 1733527490250938.0,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.087000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "45778a5bf8ac18a2973d6b91e20be752"}
+V1206 15:24:50.252000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b19aaa17a3d9b352d63772e3deb35b7e"}
 	{
 	"name": "TritonBundler.collect",
-	"ts": 1733463844087137.8,
+	"ts": 1733527490252606.0,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.100000 1657128 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "18cedfe2b336d0df99bbe972776f4771"}
+V1206 15:24:50.256000 1667746 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c0c67812e1ca34090ce7aea9849e792e"}
 	{
 	"name": "fx_graph_cache_miss",
-	"ts": 1733463836419651.8,
+	"ts": 1733527486241755.2,
 	"args": {
-	"key": "f3gi4pndnobstdl23se5doxewpvvcxdsjxmjk6hyx2eeiggheylt",
+	"key": "ff6i4wr5gqsaog5zgs3qrwebcodgoghruloxh5pzs6iul3cmayau",
 	"components": [
 	"[v4wl2w2eawehu2yuzv4ftucrovobmkotky2oimjdvonwbz3g4ir] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`",
 	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
@@ -1741,10 +1741,10 @@ V1205 21:44:04.100000 1657128 torch/_dynamo/utils.py:1327] {"chromium_event": {}
 	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10",
 	"[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)",
 	"[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)",
-	"[lazrphiwnez3ji6des6gtc4njrnab7zwbev2y4zmtejzolkeonc] torch_version: <bytes>",
-	"[c3z7bmoxyo6gl5hi47v6dc7jwsl55b3asd75nr25uyengi5ah3p] system_info[device]: {'name': 'NVIDIA PG509-210'}",
-	"[4tdxvgx6mly7ezamz2ha3km5vsr3c2li4xy77jtowhccraezut3] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-8d217ee87b6d439b8ab320cb84969cbec81a3692993e42439efbd2555077cdbd-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.1'}",
-	"[vuha4ftpp7xvpssr5oo6hre5gds5luljde44u6n6dahpycrz46w] system_info[hash]: bd18ba19a73d872953f4b194a13bfda93fc87d9d602faf8815918f646b14fef9",
+	"[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>",
+	"[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}",
+	"[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}",
+	"[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d",
 	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False",
 	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True",
 	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False",
@@ -2000,11 +2000,11 @@ V1205 21:44:04.100000 1657128 torch/_dynamo/utils.py:1327] {"chromium_event": {}
 	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None",
 	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"
 	],
-	"cache_event_time": 1733463838360684507,
+	"cache_event_time": 1733527486949451176,
 	"cache_state": "miss",
 	"triton_bundler_meta": "TritonBundlerMetadata(cached_kernel_names=['triton_tem_fused_0'])",
-	"time_taken_ns": 7665057603,
-	"compile_id": "-/1/0"
+	"time_taken_ns": 4009141776,
+	"compile_id": "1/0"
 	},
 	"ph": "i",
 	"cat": "dynamo_timed",
@@ -2012,19 +2012,19 @@ V1205 21:44:04.100000 1657128 torch/_dynamo/utils.py:1327] {"chromium_event": {}
 	"pid": 0,
 	"s": "p"
 	}
-V1205 21:44:04.101000 1657128 torch/_inductor/compile_fx.py:751] {"artifact": {"name": "fx_graph_cache_miss", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "348e50ac91425d0a0ca62741c4d1d35f"}
-	{"key": "f3gi4pndnobstdl23se5doxewpvvcxdsjxmjk6hyx2eeiggheylt", "components": ["[v4wl2w2eawehu2yuzv4ftucrovobmkotky2oimjdvonwbz3g4ir] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[3]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[4]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[5]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[6]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[7]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[8]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[9]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[10]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", "[lmglpn4zi7vob56n34r2j2rk7flv5xfgrcvmo7xcpirqsitygqx] fx_kwargs[boxed_forward_device_index]: BoxedDeviceIndex(value=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", "[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] fx_kwargs[static_input_idxs]: []", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inputs_to_check[1]: 1", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inputs_to_check[2]: 2", "[kcuxe2zwm3mzv2uk6adm6iskoy35bqfv725twacrdewod2dbl5d] inputs_to_check[3]: 3", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inputs_to_check[4]: 4", "[qs5hilycp4ew4ivtc7m5jaxp7q4pm5slioxw3fi3ur6ei65ybz4] inputs_to_check[5]: 5", "[agkvbkaha53nbz3aeeuhvxjvvc4glhfjofzkg6g2qjoo2e5otcx] inputs_to_check[6]: 6", "[j3s5elu6itwgjafc7rzhy4whrbufl6kfmlufjhh25grt643bk5f] inputs_to_check[7]: 7", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inputs_to_check[8]: 8", "[qlgfiyqewrmkgqth2qm6wkq2ja5lzkapg3ypgnvoyfqqnidaoj3] inputs_to_check[9]: 9", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10", "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", "[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)", "[lazrphiwnez3ji6des6gtc4njrnab7zwbev2y4zmtejzolkeonc] torch_version: <bytes>", "[c3z7bmoxyo6gl5hi47v6dc7jwsl55b3asd75nr25uyengi5ah3p] system_info[device]: {'name': 'NVIDIA PG509-210'}", "[4tdxvgx6mly7ezamz2ha3km5vsr3c2li4xy77jtowhccraezut3] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-8d217ee87b6d439b8ab320cb84969cbec81a3692993e42439efbd2555077cdbd-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.1'}", "[vuha4ftpp7xvpssr5oo6hre5gds5luljde44u6n6dahpycrz46w] system_info[hash]: bd18ba19a73d872953f4b194a13bfda93fc87d9d602faf8815918f646b14fef9", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fx_graph_remote_cache]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_local_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_remote_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[bundled_autotune_remote_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[custom_op_default_layout_constraint]: needs_fixed_stride_order", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {}", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True", "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128", "[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False", "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", "[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096", "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", "[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {}", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: []", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"], "cache_event_time": 1733463838360684507, "cache_state": "miss", "triton_bundler_meta": "TritonBundlerMetadata(cached_kernel_names=['triton_tem_fused_0'])", "time_taken_ns": 7665057603, "compile_id": "-/1/0"}
-V1205 21:44:04.102000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ec5fa14c2c3d6ccce81e41010d942ff0"}
+V1206 15:24:50.257000 1667746 torch/_inductor/compile_fx.py:751] {"artifact": {"name": "fx_graph_cache_miss", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "0ad616f7fe717a86ed9e39486f7955e2"}
+	{"key": "ff6i4wr5gqsaog5zgs3qrwebcodgoghruloxh5pzs6iul3cmayau", "components": ["[v4wl2w2eawehu2yuzv4ftucrovobmkotky2oimjdvonwbz3g4ir] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[3]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[4]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[5]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[6]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[7]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[8]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[9]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[10]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", "[lmglpn4zi7vob56n34r2j2rk7flv5xfgrcvmo7xcpirqsitygqx] fx_kwargs[boxed_forward_device_index]: BoxedDeviceIndex(value=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", "[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] fx_kwargs[static_input_idxs]: []", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inputs_to_check[1]: 1", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inputs_to_check[2]: 2", "[kcuxe2zwm3mzv2uk6adm6iskoy35bqfv725twacrdewod2dbl5d] inputs_to_check[3]: 3", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inputs_to_check[4]: 4", "[qs5hilycp4ew4ivtc7m5jaxp7q4pm5slioxw3fi3ur6ei65ybz4] inputs_to_check[5]: 5", "[agkvbkaha53nbz3aeeuhvxjvvc4glhfjofzkg6g2qjoo2e5otcx] inputs_to_check[6]: 6", "[j3s5elu6itwgjafc7rzhy4whrbufl6kfmlufjhh25grt643bk5f] inputs_to_check[7]: 7", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inputs_to_check[8]: 8", "[qlgfiyqewrmkgqth2qm6wkq2ja5lzkapg3ypgnvoyfqqnidaoj3] inputs_to_check[9]: 9", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10", "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", "[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)", "[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>", "[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}", "[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}", "[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fx_graph_remote_cache]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_local_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_remote_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[bundled_autotune_remote_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[custom_op_default_layout_constraint]: needs_fixed_stride_order", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {}", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True", "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128", "[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False", "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", "[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096", "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", "[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {}", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: []", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"], "cache_event_time": 1733527486949451176, "cache_state": "miss", "triton_bundler_meta": "TritonBundlerMetadata(cached_kernel_names=['triton_tem_fused_0'])", "time_taken_ns": 4009141776, "compile_id": "1/0"}
+V1206 15:24:50.257000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c7114cd61939a8286e15f75f4b1dcb23"}
 	{
 	"name": "inductor_compile",
-	"ts": 1733463844101886.0,
+	"ts": 1733527490257456.0,
 	"args": {
 	"fn_name": "compile_fx_inner",
-	"compile_id": "-/1/0",
+	"compile_id": "1/0",
 	"is_backward": false,
 	"cache_state": "miss",
-	"cache_event_time": 1733463836419651726,
-	"key": "f3gi4pndnobstdl23se5doxewpvvcxdsjxmjk6hyx2eeiggheylt",
+	"cache_event_time": 1733527486241755330,
+	"key": "ff6i4wr5gqsaog5zgs3qrwebcodgoghruloxh5pzs6iul3cmayau",
 	"components": [
 	"[v4wl2w2eawehu2yuzv4ftucrovobmkotky2oimjdvonwbz3g4ir] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`",
 	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
@@ -2060,10 +2060,10 @@ V1205 21:44:04.102000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10",
 	"[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)",
 	"[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)",
-	"[lazrphiwnez3ji6des6gtc4njrnab7zwbev2y4zmtejzolkeonc] torch_version: <bytes>",
-	"[c3z7bmoxyo6gl5hi47v6dc7jwsl55b3asd75nr25uyengi5ah3p] system_info[device]: {'name': 'NVIDIA PG509-210'}",
-	"[4tdxvgx6mly7ezamz2ha3km5vsr3c2li4xy77jtowhccraezut3] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-8d217ee87b6d439b8ab320cb84969cbec81a3692993e42439efbd2555077cdbd-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.1'}",
-	"[vuha4ftpp7xvpssr5oo6hre5gds5luljde44u6n6dahpycrz46w] system_info[hash]: bd18ba19a73d872953f4b194a13bfda93fc87d9d602faf8815918f646b14fef9",
+	"[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>",
+	"[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}",
+	"[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}",
+	"[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d",
 	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False",
 	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True",
 	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False",
@@ -2328,41 +2328,41 @@ V1205 21:44:04.102000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.103000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c3716301b887f801b5c6f14a60decb0a"}
+V1206 15:24:50.258000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "df504ff9f036fd058109eb7eba60947f"}
 	{
 	"name": "compile_fx.<locals>.fw_compiler_base",
-	"ts": 1733463844102974.2,
+	"ts": 1733527490258109.2,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.106000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d462dc8962a92e917e711a0a690b8939"}
+V1206 15:24:50.259000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "8774d579b741586b86c16bfd883c37fc"}
 	{
 	"name": "create_aot_dispatcher_function",
-	"ts": 1733463844106400.8,
+	"ts": 1733527490259870.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.106000 1657128 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "641549ff7f07f295d51b936b42b312ca"}
+V1206 15:24:50.260000 1667746 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "dee620e7977ddcc771eb4099546c78e8"}
 	{
 	"name": "autograd_cache_bypass",
-	"ts": 1733463836114794.5,
+	"ts": 1733527486087831.0,
 	"args": {
 	"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention",
 	"cache_bypass_hard_exception": false,
 	"key": null,
 	"cache_state": "bypass",
 	"components": [],
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "i",
 	"cat": "dynamo_timed",
@@ -2370,19 +2370,19 @@ V1205 21:44:04.106000 1657128 torch/_dynamo/utils.py:1327] {"chromium_event": {}
 	"pid": 0,
 	"s": "p"
 	}
-V1205 21:44:04.107000 1657128 torch/_functorch/_aot_autograd/autograd_cache.py:763] {"artifact": {"name": "aotautograd_cache_hash", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b55f3a3a16b369b568f062c1351f1185"}
-	{"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention", "cache_bypass_hard_exception": false, "key": null, "cache_state": "bypass", "components": [], "compile_id": "-/1/0"}
-V1205 21:44:04.107000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c8c9139b641268533263576d3f428ea0"}
+V1206 15:24:50.260000 1667746 torch/_functorch/_aot_autograd/autograd_cache.py:763] {"artifact": {"name": "aotautograd_cache_hash", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "5601d02186053adcc1ba29fd248c1d20"}
+	{"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention", "cache_bypass_hard_exception": false, "key": null, "cache_state": "bypass", "components": [], "compile_id": "1/0"}
+V1206 15:24:50.260000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "1b940c0ffee7b576f294d159fb3b7a84"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463844107456.5,
+	"ts": 1733527490260502.8,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/1/0",
+	"compile_id": "1/0",
 	"requires_subclass_dispatch": false,
 	"dispatch_mode": "inference",
 	"cache_state": "bypass",
-	"cache_event_time": 1733463836114794466,
+	"cache_event_time": 1733527486087830904,
 	"key": null,
 	"components": [],
 	"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention",
@@ -2394,7 +2394,7 @@ V1205 21:44:04.107000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.138000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "9bd4fb3172b99655644063e91be4e780"}
+V1206 15:24:50.275000 1667746 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "004a7ba640839d40992c1546206d56ff"}
 	
 	TREE_GUARD_MANAGER:
 	+- RootGuardManager
@@ -2402,21 +2402,21 @@ V1205 21:44:04.138000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| +- GLOBAL_STATE: ___check_global_state()
 	| +- TORCH_FUNCTION_MODE_STACK: ___check_torch_function_mode_stack()
 	| +- GuardManager: source=L['k'], accessed_by=DictGetItemGuardAccessor('k')
-	| | +- TYPE_MATCH: ___check_type_id(L['k'], 82291104)                          
+	| | +- TYPE_MATCH: ___check_type_id(L['k'], 82181376)                          
 	| | +- TENSOR_MATCH: check_tensor(L['k'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
 	| | +- NO_HASATTR: hasattr(L['k'], '_dynamo_dynamic_indices') == False         
 	| | +- NO_TENSOR_ALIASING: check_no_aliasing(L['k'], L['q'], L['v'], L['block_mask'].q_indices, L['block_mask'].kv_indices, L['block_mask'].q_num_blocks, L['block_mask'].kv_num_blocks, L['block_mask'].full_q_indices, L['block_mask'].full_kv_indices, L['block_mask'].full_q_num_blocks, L['block_mask'].full_kv_num_blocks)
 	| | +- GuardManager: source=L['k'].dim, accessed_by=GetAttrGuardAccessor(dim)
 	| | +- GuardManager: source=L['k'].size, accessed_by=GetAttrGuardAccessor(size)
 	| +- GuardManager: source=L['q'], accessed_by=DictGetItemGuardAccessor('q')
-	| | +- TYPE_MATCH: ___check_type_id(L['q'], 82291104)                          
+	| | +- TYPE_MATCH: ___check_type_id(L['q'], 82181376)                          
 	| | +- TENSOR_MATCH: check_tensor(L['q'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
 	| | +- NO_HASATTR: hasattr(L['q'], '_dynamo_dynamic_indices') == False         
 	| | +- NO_TENSOR_ALIASING
 	| | +- GuardManager: source=L['q'].dim, accessed_by=GetAttrGuardAccessor(dim)
 	| | +- GuardManager: source=L['q'].size, accessed_by=GetAttrGuardAccessor(size)
 	| +- GuardManager: source=L['v'], accessed_by=DictGetItemGuardAccessor('v')
-	| | +- TYPE_MATCH: ___check_type_id(L['v'], 82291104)                          
+	| | +- TYPE_MATCH: ___check_type_id(L['v'], 82181376)                          
 	| | +- TENSOR_MATCH: check_tensor(L['v'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
 	| | +- NO_HASATTR: hasattr(L['v'], '_dynamo_dynamic_indices') == False         
 	| | +- NO_TENSOR_ALIASING
@@ -2424,12 +2424,12 @@ V1205 21:44:04.138000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | +- GuardManager: source=L['v'].size, accessed_by=GetAttrGuardAccessor(size)
 	| +- GuardManager: source=L['score_mod'], accessed_by=DictGetItemGuardAccessor('score_mod')
 	| | +- GuardManager: source=L['score_mod'].__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | +- ID_MATCH: ___check_obj_id(L['score_mod'].__code__, 139667217695600)   
+	| | | +- ID_MATCH: ___check_obj_id(L['score_mod'].__code__, 140062268556144)   
 	| +- GuardManager: source=L['block_mask'], accessed_by=DictGetItemGuardAccessor('block_mask')
-	| | +- TYPE_MATCH: ___check_type_id(L['block_mask'], 396207408)                
+	| | +- TYPE_MATCH: ___check_type_id(L['block_mask'], 139679664)                
 	| | +- GuardManager: source=L['block_mask'].mask_mod, accessed_by=GetAttrGuardAccessor(mask_mod)
 	| | | +- GuardManager: source=L['block_mask'].mask_mod.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | +- ID_MATCH: ___check_obj_id(L['block_mask'].mask_mod.__code__, 139667217696224)
+	| | | | +- ID_MATCH: ___check_obj_id(L['block_mask'].mask_mod.__code__, 140062268556768)
 	| | +- GuardManager: source=L['block_mask'].q_indices, accessed_by=GetAttrGuardAccessor(q_indices)
 	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].q_indices, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
 	| | | +- NO_HASATTR: hasattr(L['block_mask'].q_indices, '_dynamo_dynamic_indices') == False
@@ -2475,7 +2475,7 @@ V1205 21:44:04.138000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | | | +- ID_MATCH: ___check_obj_id(L['block_mask'].as_tuple.__defaults__[0], 8908032)
 	| +- GuardManager: source=L['flex_attention'], accessed_by=DictGetItemGuardAccessor('flex_attention')
 	| | +- GuardManager: source=L['flex_attention'].__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__code__, 397452288)    
+	| | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__code__, 139318784)    
 	| | +- GuardManager: source=L['flex_attention'], accessed_by=FuncDefaultsGuardAccessor
 	| | | +- GuardManager: source=L['flex_attention'].__defaults__[2], accessed_by=GetItemGuardAccessor(2)
 	| | | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__defaults__[2], 8822752)
@@ -2486,17 +2486,17 @@ V1205 21:44:04.138000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | +- GuardManager: source=L['flex_attention'].__defaults__[5], accessed_by=GetItemGuardAccessor(5)
 	| | | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__defaults__[5], 8822752)
 	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
-	| | +- GuardManager: source=G['_139667213076496_c1'], accessed_by=DictGetItemGuardAccessor('_139667213076496_c1')
-	| | | +- GuardManager: source=G['_139667213076496_c1'].Tensor, accessed_by=GetAttrGuardAccessor(Tensor)
-	| | | | +- ID_MATCH: ___check_obj_id(G['_139667213076496_c1'].Tensor, 82291104)  
-	| | | | +- GuardManager: source=G['_139667213076496_c1'].Tensor.__bases__, accessed_by=GetAttrGuardAccessor(__bases__)
-	| | | | | +- GuardManager: source=G['_139667213076496_c1'].Tensor.__bases__[0], accessed_by=TupleGetItemGuardAccessor(0)
-	| | | | | | +- ID_MATCH: ___check_obj_id(G['_139667213076496_c1'].Tensor.__bases__[0], 139667196393888)
+	| | +- GuardManager: source=G['_140062263790704_c1'], accessed_by=DictGetItemGuardAccessor('_140062263790704_c1')
+	| | | +- GuardManager: source=G['_140062263790704_c1'].Tensor, accessed_by=GetAttrGuardAccessor(Tensor)
+	| | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c1'].Tensor, 82181376)  
+	| | | | +- GuardManager: source=G['_140062263790704_c1'].Tensor.__bases__, accessed_by=GetAttrGuardAccessor(__bases__)
+	| | | | | +- GuardManager: source=G['_140062263790704_c1'].Tensor.__bases__[0], accessed_by=TupleGetItemGuardAccessor(0)
+	| | | | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c1'].Tensor.__bases__[0], 140062119703136)
 	| | +- GuardManager: source=G['__builtins_dict___2'], accessed_by=DictGetItemGuardAccessor('__builtins_dict___2')
 	| | | +- GuardManager: source=G['__builtins_dict___2']['len'], accessed_by=DictGetItemGuardAccessor('len')
-	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___2']['len'], 139667218584480)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___2']['len'], 140062269592480)
 	| | | +- GuardManager: source=G['__builtins_dict___2']['sum'], accessed_by=DictGetItemGuardAccessor('sum')
-	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___2']['sum'], 139667218585600)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___2']['sum'], 140062269593600)
 	| | | +- GuardManager: source=G['__builtins_dict___2']['list'], accessed_by=DictGetItemGuardAccessor('list')
 	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___2']['list'], 8841312)  
 	| | | +- GuardManager: source=G['__builtins_dict___2']['type'], accessed_by=DictGetItemGuardAccessor('type')
@@ -2506,16 +2506,16 @@ V1205 21:44:04.138000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | +- GuardManager: source=G['__builtins_dict___2']['object'], accessed_by=DictGetItemGuardAccessor('object')
 	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___2']['object'], 8810976)
 	| | | +- GuardManager: source=G['__builtins_dict___2']['isinstance'], accessed_by=DictGetItemGuardAccessor('isinstance')
-	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___2']['isinstance'], 139667218584160)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___2']['isinstance'], 140062269592160)
 	| | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_utils_dot__pytree')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'], 139665266915776)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'], 140057600329744)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].TreeSpec, accessed_by=GetAttrGuardAccessor(TreeSpec)
-	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].TreeSpec, 87414528)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].TreeSpec, 86791872)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf, accessed_by=GetAttrGuardAccessor(_is_leaf)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, 139665266954544)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, 140057601749808)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, accessed_by=GetAttrGuardAccessor(_LEAF_SPEC)
-	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, 87437744)
+	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, 87025712)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, accessed_by=GetAttrGuardAccessor(type)
 	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, 8822752)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.context, accessed_by=GetAttrGuardAccessor(context)
@@ -2531,31 +2531,31 @@ V1205 21:44:04.138000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | | | +- LENGTH_CHECK: not G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type, accessed_by=GetAttrGuardAccessor(_get_node_type)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, 139665266954272)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, 140057601749536)
 	| | | +- DictGuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES, accessed_by=GetAttrGuardAccessor(SUPPORTED_NODES)
-	| | | | +- DICT_VERSION: ___dict_version(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES) == 431080
+	| | | | +- DICT_VERSION: ___dict_version(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES) == 366913
 	| | | | +- KeyValueManager pair at index=1
 	| | | | | +- ValueManager: GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]]
 	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn, accessed_by=GetAttrGuardAccessor(flatten_fn)
 	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, 139665266871808)
+	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, 140057599989696)
 	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn, accessed_by=GetAttrGuardAccessor(unflatten_fn)
 	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, 139665266552240)
+	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, 140057600267792)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper, accessed_by=GetAttrGuardAccessor(_tree_flatten_helper)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, 139665266608368)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, 140057600001392)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance, accessed_by=GetAttrGuardAccessor(_is_namedtuple_instance)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, 139665266607920)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, 140057600000944)
 	| | +- GuardManager: source=G['__import_torch_dot__dynamo_dot_comptime'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot__dynamo_dot_comptime')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_comptime'], 139664219014784)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_comptime'], 140057359527872)
 	| | +- GuardManager: source=G['__import_torch_dot__dynamo_dot_decorators'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot__dynamo_dot_decorators')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_decorators'], 139664218897952)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_decorators'], 140057359526192)
 	| | | +- GuardManager: source=G['__import_torch_dot__dynamo_dot_decorators'].is_compiling, accessed_by=GetAttrGuardAccessor(is_compiling)
-	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_decorators'].is_compiling, 139664343945664)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_decorators'].is_compiling, 140057422814624)
 	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_nn_dot_attention_dot__utils')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot__utils'], 139663431923360)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot__utils'], 140048551571072)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS, accessed_by=GetAttrGuardAccessor(_SUPPORTED_HEAD_DIMS)
 	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS, 8841312)
 	| | | | +- LENGTH_CHECK: len(G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS) == 10
@@ -2572,67 +2572,67 @@ V1205 21:44:04.138000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[5], accessed_by=ListGetItemGuardAccessor(5)
 	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[5] == 64
 	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_nn_dot_attention_dot_flex_attention')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'], 139663431922960)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'], 140048551568912)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math, accessed_by=GetAttrGuardAccessor(math)
-	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math, 139667216591312)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math, 140062267404384)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math.sqrt, accessed_by=GetAttrGuardAccessor(sqrt)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math.sqrt, 139667216595152)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math.sqrt, 140062267408144)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch, accessed_by=GetAttrGuardAccessor(torch)
-	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch, 139667213076496)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch, 140062263790704)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo, accessed_by=GetAttrGuardAccessor(_dynamo)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo, 139667211310544)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo, 140062262057760)
 	| | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static, accessed_by=GetAttrGuardAccessor(mark_static)
 	| | | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static.__code__, 107696464)
+	| | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static.__code__, 125680000)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler, accessed_by=GetAttrGuardAccessor(compiler)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler, 139664804054656)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler, 140057571877776)
 	| | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler.is_dynamo_compiling, accessed_by=GetAttrGuardAccessor(is_dynamo_compiling)
-	| | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler.is_dynamo_compiling, 139664804198208)
+	| | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler.is_dynamo_compiling, 140057572232544)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.is_grad_enabled, accessed_by=GetAttrGuardAccessor(is_grad_enabled)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.is_grad_enabled, 139667201822368)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.is_grad_enabled, 140062253115936)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device, accessed_by=GetAttrGuardAccessor(_validate_device)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device.__code__, 139663431699312)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device.__code__, 140048552018544)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop, accessed_by=GetAttrGuardAccessor(flex_attention_hop)
-	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop, 97810240)
+	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop, 96230624)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop.__name__, accessed_by=GetAttrGuardAccessor(__name__)
 	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop.__name__ == 'flex_attention'
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim, accessed_by=GetAttrGuardAccessor(_supported_head_dim)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim.__code__, 139663431174224)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim.__code__, 140048552864992)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim, accessed_by=GetAttrGuardAccessor(_validate_embed_dim)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim.__code__, 395883648)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim.__code__, 139781872)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness, accessed_by=GetAttrGuardAccessor(_validate_nestedness)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness.__code__, 139663466500208)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness.__code__, 140048553100560)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input, accessed_by=GetAttrGuardAccessor(_validate_sdpa_input)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input.__code__, 76825296)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input.__code__, 139823744)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options, accessed_by=GetAttrGuardAccessor(_apply_kernel_options)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options.__code__, 139663477636016)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options.__code__, 140048554555888)
 	
-V1205 21:44:04.138000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "dd25ebafeac0553359627c541a6a55df"}
+V1206 15:24:50.275000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "f0dcf66ffff430aab1506701a2358ee4"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463844138777.2,
+	"ts": 1733527490275762.2,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.143000 1657128 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"pre_grad_pass_time_us": 3980, "joint_graph_pass_time_us": 199281, "feature_usage": {"pytorch/remote_cache:fx_graph_memcache_version": true, "pytorch/remote_cache:bundle_triton_into_fx_graph_cache_v2": true}, "post_grad_pass_time_us": 16176, "inductor_code_gen_cumulative_compile_time_us": 5369766, "inductor_cumulative_compile_time_us": 7683665, "aot_autograd_cumulative_compile_time_us": 8001722, "dynamo_cumulative_compile_time_us": 8363644, "frame_key": "2", "co_name": "fn", "co_filename": "/data/users/xmfan/a/pytorch/test/inductor/test_codecache.py", "co_firstlineno": 459, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 91, "shape_env_guard_count": 0, "graph_op_count": 2, "graph_node_count": 16, "graph_input_count": 11, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch.testing\", \"torch._decomp\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.143000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "a79890caccf0dce18cafdfb78702e235"}
+V1206 15:24:50.278000 1667746 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"pre_grad_pass_time_us": 2504, "joint_graph_pass_time_us": 100514, "feature_usage": {"pytorch/remote_cache:fx_graph_memcache_version": true, "pytorch/remote_cache:bundle_triton_into_fx_graph_cache_v2": true}, "post_grad_pass_time_us": 9066, "inductor_code_gen_cumulative_compile_time_us": 3019492, "inductor_cumulative_compile_time_us": 4016483, "aot_autograd_cumulative_compile_time_us": 4177883, "dynamo_cumulative_compile_time_us": 4298670, "frame_key": "2", "co_name": "fn", "co_filename": "/data/users/xmfan/a/pytorch/test/inductor/test_codecache.py", "co_firstlineno": 459, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 91, "shape_env_guard_count": 0, "graph_op_count": 2, "graph_node_count": 16, "graph_input_count": 11, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.278000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "e38d71b3f183088f1bc592d1f308f959"}
 	{
 	"name": "dynamo",
-	"ts": 1733463844143642.2,
+	"ts": 1733527490278673.0,
 	"args": {
-	"compile_id": "-/1/0",
+	"compile_id": "1/0",
 	"frame_key": "2",
 	"co_name": "fn",
 	"co_filename": "/data/users/xmfan/a/pytorch/test/inductor/test_codecache.py",
@@ -2653,73 +2653,73 @@ V1205 21:44:04.143000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"restart_reasons": [],
 	"dynamo_time_before_restart_s": 0.0,
 	"has_guarded_code": true,
-	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch.testing\", \"torch._decomp\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
+	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.148000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "28cfa3b256620d8d25d9fde945a3ae56"}
+V1206 15:24:50.282000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "394328a4ef4c4a7736913bac1b8ace11"}
 	{
 	"name": "dynamo",
-	"ts": 1733463844148328.5,
+	"ts": 1733527490282024.0,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.149000 1657128 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 1439, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 81, "name": "inner", "filename": 9}, {"line": 81, "name": "inner", "filename": 9}, {"line": 483, "name": "test_flex_attention_caching", "filename": 1}, {"line": 460, "name": "fn", "filename": 1}, {"line": 1316, "name": "flex_attention", "filename": 10}, {"line": 1303, "name": "_flex_attention_hop_wrapper", "filename": 10}]}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.149000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "af824067f0fb4f07de29c1a7d6356b02"}
+V1206 15:24:50.282000 1667746 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 1439, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 81, "name": "inner", "filename": 9}, {"line": 81, "name": "inner", "filename": 9}, {"line": 483, "name": "test_flex_attention_caching", "filename": 1}, {"line": 460, "name": "fn", "filename": 1}, {"line": 1316, "name": "flex_attention", "filename": 10}, {"line": 1303, "name": "_flex_attention_hop_wrapper", "filename": 10}]}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.282000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "db937f3641ee59365a7de351869e46cf"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463844149331.2,
+	"ts": 1733527490282590.2,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.151000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 228, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.152000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96eb10>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.152000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 0, "source": "L['args'][0]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.267000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 1, "describer_id": 228, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.268000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 4, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96eb70>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.268000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 4, "source": "L['args'][1]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.269000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 2, "describer_id": 228, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.269000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 5, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ec90>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.269000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 5, "source": "L['args'][2]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.271000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 3, "describer_id": 228, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.271000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 6, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96cd10>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.271000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 6, "source": "L['args'][4][0]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.272000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 4, "describer_id": 228, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.273000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 7, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ecf0>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.273000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 7, "source": "L['args'][4][1]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.274000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 5, "describer_id": 228, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.274000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 8, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ec30>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.274000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 8, "source": "L['args'][4][2]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.275000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 6, "describer_id": 228, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.276000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 9, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ed50>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.276000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 9, "source": "L['args'][4][3]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.277000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 7, "describer_id": 228, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.277000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 10, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ce30>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.277000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 10, "source": "L['args'][4][4]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.278000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 8, "describer_id": 228, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.279000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 11, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96eff0>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.279000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 11, "source": "L['args'][4][5]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.280000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 9, "describer_id": 228, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.280000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 12, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96cdd0>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.280000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 12, "source": "L['args'][4][6]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.281000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 10, "describer_id": 228, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.282000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 13, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96f050>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.282000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 13, "source": "L['args'][4][7]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.290000 1657128 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_args_0_": [1, 4, 512, 64], "l_args_1_": [1, 4, 512, 64], "l_args_2_": [1, 4, 512, 64], "l_args_4_0_": [1, 1, 16], "l_args_4_1_": [1, 1, 16, 16], "l_args_4_2_": [1, 1, 16], "l_args_4_3_": [1, 1, 16, 16], "l_args_4_4_": [1, 1, 16], "l_args_4_5_": [1, 1, 16, 16], "l_args_4_6_": [1, 1, 16], "l_args_4_7_": [1, 1, 16, 16], "getitem": [1, 4, 512, 64], "getitem_1": [1, 4, 512]}}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "52fbb526c9a1f1575361cb8ed8e7794e"}
+V1206 15:24:50.284000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 228, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.284000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab890>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.284000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 0, "source": "L['args'][0]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.346000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 1, "describer_id": 228, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.346000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 4, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab950>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.347000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 4, "source": "L['args'][1]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.347000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 2, "describer_id": 228, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.347000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 5, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5730>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.347000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 5, "source": "L['args'][2]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.348000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 3, "describer_id": 228, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.348000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 6, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982aaed0>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.348000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 6, "source": "L['args'][4][0]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.349000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 4, "describer_id": 228, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.349000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 7, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5850>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.349000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 7, "source": "L['args'][4][1]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.350000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 5, "describer_id": 228, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.350000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 8, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5790>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.350000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 8, "source": "L['args'][4][2]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.350000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 6, "describer_id": 228, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.350000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 9, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c58b0>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.351000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 9, "source": "L['args'][4][3]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.351000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 7, "describer_id": 228, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.351000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 10, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5c10>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.351000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 10, "source": "L['args'][4][4]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.352000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 8, "describer_id": 228, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.352000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 11, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5b50>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.352000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 11, "source": "L['args'][4][5]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.353000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 9, "describer_id": 228, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.353000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 12, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5cd0>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.353000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 12, "source": "L['args'][4][6]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.353000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 10, "describer_id": 228, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.354000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 13, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5bb0>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.354000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 13, "source": "L['args'][4][7]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.358000 1667746 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_args_0_": [1, 4, 512, 64], "l_args_1_": [1, 4, 512, 64], "l_args_2_": [1, 4, 512, 64], "l_args_4_0_": [1, 1, 16], "l_args_4_1_": [1, 1, 16, 16], "l_args_4_2_": [1, 1, 16], "l_args_4_3_": [1, 1, 16, 16], "l_args_4_4_": [1, 1, 16], "l_args_4_5_": [1, 1, 16, 16], "l_args_4_6_": [1, 1, 16], "l_args_4_7_": [1, 1, 16, 16], "getitem": [1, 4, 512, 64], "getitem_1": [1, 4, 512]}}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "52fbb526c9a1f1575361cb8ed8e7794e"}
 	class GraphModule(torch.nn.Module):
 	    def forward(self, L_args_0_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_args_1_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_args_2_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_args_4_0_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_1_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_args_4_2_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_3_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_args_4_4_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_5_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_args_4_6_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_7_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
 	        l_args_0_ = L_args_0_
@@ -2755,33 +2755,33 @@ V1205 21:44:04.290000 1657128 torch/_dynamo/output_graph.py:1336] {"dynamo_outpu
 	            ge: "b8[][]cuda:0" = child_2 >= child_3;  child_2 = child_3 = None
 	            return ge
 	            
-V1205 21:44:04.290000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "fa45747505103bb800e6cda5a8df016b"}
+V1206 15:24:50.358000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "7ea1fa27ae5b4f6ebbb5c849ee69a9a2"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463844290774.5,
+	"ts": 1733527490358949.8,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.291000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "965db87042b0fcf231e7dfbb79fafe26"}
+V1206 15:24:50.359000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "7f0ded16da08aebef0f53116ced241f9"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463844291225.0,
+	"ts": 1733527490359226.2,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.313000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "9eee57bcc5c293120dd2fb102d61ea80"}
+V1206 15:24:50.370000 1667746 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "217903d8e83efab45bc8d1821a378153"}
 	
 	TREE_GUARD_MANAGER:
 	+- RootGuardManager
@@ -2805,7 +2805,7 @@ V1205 21:44:04.313000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | +- NO_TENSOR_ALIASING
 	| | +- GuardManager: source=L['args'][3], accessed_by=TupleGetItemGuardAccessor(3)
 	| | | +- GuardManager: source=L['args'][3].__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | +- ID_MATCH: ___check_obj_id(L['args'][3].__code__, 139667217695600)     
+	| | | | +- ID_MATCH: ___check_obj_id(L['args'][3].__code__, 140062268556144)     
 	| | +- GuardManager: source=L['args'][4], accessed_by=TupleGetItemGuardAccessor(4)
 	| | | +- TYPE_MATCH: ___check_type_id(L['args'][4], 8812224)                     
 	| | | +- LENGTH_CHECK: len(L['args'][4]) == 11                                     
@@ -2847,7 +2847,7 @@ V1205 21:44:04.313000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | | +- EQUALS_MATCH: L['args'][4][9] == 128                                      
 	| | | +- GuardManager: source=L['args'][4][10], accessed_by=TupleGetItemGuardAccessor(10)
 	| | | | +- GuardManager: source=L['args'][4][10].__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(L['args'][4][10].__code__, 139667217696224) 
+	| | | | | +- ID_MATCH: ___check_obj_id(L['args'][4][10].__code__, 140062268556768) 
 	| | +- GuardManager: source=L['args'][5], accessed_by=TupleGetItemGuardAccessor(5)
 	| | | +- EQUALS_MATCH: L['args'][5] == 0.125                                       
 	| | +- GuardManager: source=L['args'][6], accessed_by=TupleGetItemGuardAccessor(6)
@@ -2864,20 +2864,20 @@ V1205 21:44:04.313000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | +- DICT_LENGTH: not L['kwargs']                                             
 	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
 	| | +- GuardManager: source=G['flex_attention_hop'], accessed_by=DictGetItemGuardAccessor('flex_attention_hop')
-	| | | +- TYPE_MATCH: ___check_type_id(G['flex_attention_hop'], 97810240)         
+	| | | +- TYPE_MATCH: ___check_type_id(G['flex_attention_hop'], 96230624)         
 	| | | +- GuardManager: source=G['flex_attention_hop'].__name__, accessed_by=GetAttrGuardAccessor(__name__)
 	| | | | +- EQUALS_MATCH: G['flex_attention_hop'].__name__ == 'flex_attention'        
-	| | +- GuardManager: source=G['_139667213076496_c2'], accessed_by=DictGetItemGuardAccessor('_139667213076496_c2')
-	| | | +- GuardManager: source=G['_139667213076496_c2'].Tensor, accessed_by=GetAttrGuardAccessor(Tensor)
-	| | | | +- ID_MATCH: ___check_obj_id(G['_139667213076496_c2'].Tensor, 82291104)  
-	| | | | +- GuardManager: source=G['_139667213076496_c2'].Tensor.__bases__, accessed_by=GetAttrGuardAccessor(__bases__)
-	| | | | | +- GuardManager: source=G['_139667213076496_c2'].Tensor.__bases__[0], accessed_by=TupleGetItemGuardAccessor(0)
-	| | | | | | +- ID_MATCH: ___check_obj_id(G['_139667213076496_c2'].Tensor.__bases__[0], 139667196393888)
+	| | +- GuardManager: source=G['_140062263790704_c2'], accessed_by=DictGetItemGuardAccessor('_140062263790704_c2')
+	| | | +- GuardManager: source=G['_140062263790704_c2'].Tensor, accessed_by=GetAttrGuardAccessor(Tensor)
+	| | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c2'].Tensor, 82181376)  
+	| | | | +- GuardManager: source=G['_140062263790704_c2'].Tensor.__bases__, accessed_by=GetAttrGuardAccessor(__bases__)
+	| | | | | +- GuardManager: source=G['_140062263790704_c2'].Tensor.__bases__[0], accessed_by=TupleGetItemGuardAccessor(0)
+	| | | | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c2'].Tensor.__bases__[0], 140062119703136)
 	| | +- GuardManager: source=G['__builtins_dict___4'], accessed_by=DictGetItemGuardAccessor('__builtins_dict___4')
 	| | | +- GuardManager: source=G['__builtins_dict___4']['len'], accessed_by=DictGetItemGuardAccessor('len')
-	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___4']['len'], 139667218584480)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___4']['len'], 140062269592480)
 	| | | +- GuardManager: source=G['__builtins_dict___4']['sum'], accessed_by=DictGetItemGuardAccessor('sum')
-	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___4']['sum'], 139667218585600)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___4']['sum'], 140062269593600)
 	| | | +- GuardManager: source=G['__builtins_dict___4']['list'], accessed_by=DictGetItemGuardAccessor('list')
 	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___4']['list'], 8841312)  
 	| | | +- GuardManager: source=G['__builtins_dict___4']['type'], accessed_by=DictGetItemGuardAccessor('type')
@@ -2887,16 +2887,16 @@ V1205 21:44:04.313000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | +- GuardManager: source=G['__builtins_dict___4']['object'], accessed_by=DictGetItemGuardAccessor('object')
 	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___4']['object'], 8810976)
 	| | | +- GuardManager: source=G['__builtins_dict___4']['isinstance'], accessed_by=DictGetItemGuardAccessor('isinstance')
-	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___4']['isinstance'], 139667218584160)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___4']['isinstance'], 140062269592160)
 	| | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_utils_dot__pytree')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'], 139665266915776)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'], 140057600329744)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].TreeSpec, accessed_by=GetAttrGuardAccessor(TreeSpec)
-	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].TreeSpec, 87414528)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].TreeSpec, 86791872)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf, accessed_by=GetAttrGuardAccessor(_is_leaf)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, 139665266954544)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, 140057601749808)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, accessed_by=GetAttrGuardAccessor(_LEAF_SPEC)
-	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, 87437744)
+	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, 87025712)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, accessed_by=GetAttrGuardAccessor(type)
 	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, 8822752)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.context, accessed_by=GetAttrGuardAccessor(context)
@@ -2912,44 +2912,44 @@ V1205 21:44:04.313000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | | | +- LENGTH_CHECK: not G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type, accessed_by=GetAttrGuardAccessor(_get_node_type)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, 139665266954272)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, 140057601749536)
 	| | | +- DictGuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES, accessed_by=GetAttrGuardAccessor(SUPPORTED_NODES)
-	| | | | +- DICT_VERSION: ___dict_version(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES) == 431080
+	| | | | +- DICT_VERSION: ___dict_version(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES) == 366913
 	| | | | +- KeyValueManager pair at index=1
 	| | | | | +- ValueManager: GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]]
 	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn, accessed_by=GetAttrGuardAccessor(flatten_fn)
 	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, 139665266871808)
+	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, 140057599989696)
 	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn, accessed_by=GetAttrGuardAccessor(unflatten_fn)
 	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, 139665266552240)
+	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, 140057600267792)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper, accessed_by=GetAttrGuardAccessor(_tree_flatten_helper)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, 139665266608368)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, 140057600001392)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance, accessed_by=GetAttrGuardAccessor(_is_namedtuple_instance)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, 139665266607920)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, 140057600000944)
 	
-V1205 21:44:04.314000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "16cb162cfbd7f49a6a500d7624204cb5"}
+V1206 15:24:50.370000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "5d9cf4ebb554cb0afa821efb370971a5"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463844314136.5,
+	"ts": 1733527490370919.8,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.318000 1657128 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 450, "dynamo_cumulative_compile_time_us": 164805, "frame_key": "1", "co_name": "_flex_attention_hop_wrapper", "co_filename": "/data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py", "co_firstlineno": 1303, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 58, "shape_env_guard_count": 0, "graph_op_count": 3, "graph_node_count": 17, "graph_input_count": 11, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 9223372036854775807, \"accumulated_cache_size_limit\": 9223372036854775807, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch.testing\", \"torch._decomp\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.318000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "aa07e9284aac69b58aaec746f4c5e4cb"}
+V1206 15:24:50.373000 1667746 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 276, "dynamo_cumulative_compile_time_us": 88329, "frame_key": "1", "co_name": "_flex_attention_hop_wrapper", "co_filename": "/data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py", "co_firstlineno": 1303, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 58, "shape_env_guard_count": 0, "graph_op_count": 3, "graph_node_count": 17, "graph_input_count": 11, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 9223372036854775807, \"accumulated_cache_size_limit\": 9223372036854775807, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.373000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "0e7b5fdc5a2525dc764f709284ebcfb6"}
 	{
 	"name": "dynamo",
-	"ts": 1733463844318854.0,
+	"ts": 1733527490373678.8,
 	"args": {
-	"compile_id": "-/0/0",
+	"compile_id": "0/0",
 	"frame_key": "1",
 	"co_name": "_flex_attention_hop_wrapper",
 	"co_filename": "/data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py",
@@ -2970,73 +2970,73 @@ V1205 21:44:04.318000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"restart_reasons": [],
 	"dynamo_time_before_restart_s": 0.0,
 	"has_guarded_code": true,
-	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 9223372036854775807, \"accumulated_cache_size_limit\": 9223372036854775807, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch.testing\", \"torch._decomp\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
+	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 9223372036854775807, \"accumulated_cache_size_limit\": 9223372036854775807, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.322000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "7fe4510c51abef8f9a708c12edee87fa"}
+V1206 15:24:50.375000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "1f4a76ca6b5fcae61cfcc92045397262"}
 	{
 	"name": "dynamo",
-	"ts": 1733463844322103.0,
+	"ts": 1733527490375674.2,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.322000 1657128 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 1439, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 81, "name": "inner", "filename": 9}, {"line": 81, "name": "inner", "filename": 9}, {"line": 483, "name": "test_flex_attention_caching", "filename": 1}, {"line": 459, "name": "fn", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.323000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "8280ce3c0ecc94f92371e4e51235ca56"}
+V1206 15:24:50.376000 1667746 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 1439, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 81, "name": "inner", "filename": 9}, {"line": 81, "name": "inner", "filename": 9}, {"line": 483, "name": "test_flex_attention_caching", "filename": 1}, {"line": 459, "name": "fn", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.376000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "8d1bd9bb5e587d5f1fa509e316288846"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463844323012.0,
+	"ts": 1733527490376205.0,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.325000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 245, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.325000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96eb10>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.325000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 0, "source": "L['q']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.335000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 1, "describer_id": 245, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.336000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 1, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96eb70>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.336000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 1, "source": "L['k']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.337000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 2, "describer_id": 245, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.338000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 2, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ec90>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.338000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 2, "source": "L['v']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.348000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 3, "describer_id": 245, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.349000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 3, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96cd10>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.349000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 3, "source": "L['block_mask'].kv_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.481000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 4, "describer_id": 245, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.482000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 7, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ecf0>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.482000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 7, "source": "L['block_mask'].kv_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.483000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 5, "describer_id": 245, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.484000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 8, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ec30>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.484000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 8, "source": "L['block_mask'].full_kv_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.485000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 6, "describer_id": 245, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.486000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 9, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ed50>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.486000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 9, "source": "L['block_mask'].full_kv_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.487000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 7, "describer_id": 245, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.488000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 10, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ce30>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.488000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 10, "source": "L['block_mask'].q_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.489000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 8, "describer_id": 245, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.490000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 11, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96eff0>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.490000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 11, "source": "L['block_mask'].q_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.491000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 9, "describer_id": 245, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.492000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 12, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96cdd0>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.492000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 12, "source": "L['block_mask'].full_q_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.493000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 10, "describer_id": 245, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.493000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 13, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96f050>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.494000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 13, "source": "L['block_mask'].full_q_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.502000 1657128 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_q_": [1, 4, 512, 64], "l_k_": [1, 4, 512, 64], "l_v_": [1, 4, 512, 64], "l_block_mask_kv_num_blocks": [1, 1, 16], "l_block_mask_kv_indices": [1, 1, 16, 16], "l_block_mask_full_kv_num_blocks": [1, 1, 16], "l_block_mask_full_kv_indices": [1, 1, 16, 16], "l_block_mask_q_num_blocks": [1, 1, 16], "l_block_mask_q_indices": [1, 1, 16, 16], "l_block_mask_full_q_num_blocks": [1, 1, 16], "l_block_mask_full_q_indices": [1, 1, 16, 16], "out": [1, 4, 512, 64]}}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "e6aa2ecb5d533ab181a6215ce16f359c"}
+V1206 15:24:50.377000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 245, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.377000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab890>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.377000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 0, "source": "L['q']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.383000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 1, "describer_id": 245, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.384000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 1, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab950>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.384000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 1, "source": "L['k']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.384000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 2, "describer_id": 245, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.385000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 2, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5730>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.385000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 2, "source": "L['v']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.390000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 3, "describer_id": 245, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.390000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 3, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982aaed0>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.390000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 3, "source": "L['block_mask'].kv_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.457000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 4, "describer_id": 245, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.457000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 7, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5850>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.458000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 7, "source": "L['block_mask'].kv_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.458000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 5, "describer_id": 245, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.459000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 8, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5790>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.459000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 8, "source": "L['block_mask'].full_kv_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.459000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 6, "describer_id": 245, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.460000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 9, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c58b0>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.460000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 9, "source": "L['block_mask'].full_kv_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.460000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 7, "describer_id": 245, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.461000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 10, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5c10>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.461000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 10, "source": "L['block_mask'].q_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.461000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 8, "describer_id": 245, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.462000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 11, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5b50>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.462000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 11, "source": "L['block_mask'].q_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.462000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 9, "describer_id": 245, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.463000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 12, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5cd0>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.463000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 12, "source": "L['block_mask'].full_q_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.463000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 10, "describer_id": 245, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.464000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 13, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5bb0>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.464000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 13, "source": "L['block_mask'].full_q_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.468000 1667746 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_q_": [1, 4, 512, 64], "l_k_": [1, 4, 512, 64], "l_v_": [1, 4, 512, 64], "l_block_mask_kv_num_blocks": [1, 1, 16], "l_block_mask_kv_indices": [1, 1, 16, 16], "l_block_mask_full_kv_num_blocks": [1, 1, 16], "l_block_mask_full_kv_indices": [1, 1, 16, 16], "l_block_mask_q_num_blocks": [1, 1, 16], "l_block_mask_q_indices": [1, 1, 16, 16], "l_block_mask_full_q_num_blocks": [1, 1, 16], "l_block_mask_full_q_indices": [1, 1, 16, 16], "out": [1, 4, 512, 64]}}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "e6aa2ecb5d533ab181a6215ce16f359c"}
 	class GraphModule(torch.nn.Module):
 	    def forward(self, L_q_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_k_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_v_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_block_mask_kv_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_kv_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_block_mask_full_kv_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_full_kv_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_block_mask_q_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_q_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_block_mask_full_q_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_full_q_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
 	        l_q_ = L_q_
@@ -3071,56 +3071,56 @@ V1205 21:44:04.502000 1657128 torch/_dynamo/output_graph.py:1336] {"dynamo_outpu
 	            ge: "b8[][]cuda:0" = child_2 >= child_3;  child_2 = child_3 = None
 	            return ge
 	            
-V1205 21:44:04.502000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c889f77130e9603d49b3d865ce826737"}
+V1206 15:24:50.468000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "80655624a23088893ab4a2c409ac95a3"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463844502768.0,
+	"ts": 1733527490468948.0,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.503000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "be05ad3ae97bafb9bb41264c04aa07dc"}
+V1206 15:24:50.469000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "a719c6c37340b85f41f898361ba8c722"}
 	{
 	"name": "_recursive_pre_grad_passes",
-	"ts": 1733463844503209.2,
+	"ts": 1733527490469220.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.504000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "daf9760c96e024ab043cb9c176316856"}
+V1206 15:24:50.469000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "a15be333d609301edbc3c4ba97a6e27a"}
 	{
 	"name": "_recursive_pre_grad_passes",
-	"ts": 1733463844504144.8,
+	"ts": 1733527490469741.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.508000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "5641340ed77b1cb52b7123e888df97ea"}
+V1206 15:24:50.471000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c4c11179a8b2ec9ad87664f12d76dd56"}
 	{
 	"name": "create_aot_dispatcher_function",
-	"ts": 1733463844508593.8,
+	"ts": 1733527490471944.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.608000 1657128 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:214] {"artifact": {"name": "aot_forward_graph_fw_metadata", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d952b3071dd9ba606a04d644841d9aba"}
+V1206 15:24:50.519000 1667746 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:214] {"artifact": {"name": "aot_forward_graph_fw_metadata", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d952b3071dd9ba606a04d644841d9aba"}
 	ViewAndMutationMeta(input_info=[InputAliasInfo(is_leaf=True,
 	                                              mutates_data=False,
 	                                              mutates_metadata=False,
@@ -3264,7 +3264,7 @@ V1205 21:44:04.608000 1657128 torch/_functorch/_aot_autograd/dispatch_and_compil
 	                    indices_of_inputs_that_requires_grad_with_mutations_in_bw=[],
 	                    bw_donated_idxs=None,
 	                    num_backward_tokens=0)
-V1205 21:44:04.610000 1657128 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:232] {"aot_inference_graph": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "02884732a6f03bac463b0e1e5f9b1cf4"}
+V1206 15:24:50.520000 1667746 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:232] {"aot_inference_graph": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "02884732a6f03bac463b0e1e5f9b1cf4"}
 	class <lambda>(torch.nn.Module):
 	    def forward(self, arg0_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg1_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg2_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg3_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg4_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg5_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg6_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg7_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg8_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg9_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg10_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
 	         # File: /data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py:1286 in flex_attention, code: out, lse = flex_attention_hop(
@@ -3287,128 +3287,128 @@ V1205 21:44:04.610000 1657128 torch/_functorch/_aot_autograd/dispatch_and_compil
 	            ge: "b8[][]cuda:0" = torch.ops.aten.ge.Tensor(arg2_1, arg3_1);  arg2_1 = arg3_1 = None
 	            return ge
 	            
-V1205 21:44:04.612000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "da7d694995a9c795f03ff85c890c2f15"}
+V1206 15:24:50.521000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "2386afea16835c738f6306784edc87e8"}
 	{
 	"name": "compile_fx.<locals>.fw_compiler_base",
-	"ts": 1733463844612244.8,
+	"ts": 1733527490521683.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.612000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "60dd9d25253d14ffe764487407ae0760"}
+V1206 15:24:50.522000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "76ae4cce2f52ad7feca705fd70cb3c6c"}
 	{
 	"name": "_recursive_joint_graph_passes",
-	"ts": 1733463844612800.2,
+	"ts": 1733527490522094.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.614000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "2a978af9f9cdb9f4b82288267fd5333e"}
+V1206 15:24:50.523000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c0a313f319086d99c7003f86a39e8eb0"}
 	{
 	"name": "_recursive_joint_graph_passes",
-	"ts": 1733463844614758.2,
+	"ts": 1733527490523142.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.615000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "bb326d91116938249f60198bb926f31b"}
+V1206 15:24:50.523000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "51c50f90b4f3e9efaa73f45c2709dc93"}
 	{
 	"name": "inductor_compile",
-	"ts": 1733463844615174.0,
+	"ts": 1733527490523375.8,
 	"args": {
 	"fn_name": "compile_fx_inner",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.631000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "18cae919fd8e76e4b0081e4cc7242a3c"}
+V1206 15:24:50.531000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "30b00efd4f6faf8b068719e99390e687"}
 	{
 	"name": "TritonBundler.read_and_emit",
-	"ts": 1733463844631295.8,
+	"ts": 1733527490531851.0,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.631000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "62d8a6fac44271a38bc943cc5b599b2e"}
+V1206 15:24:50.532000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d06401ea7dbeea897c26573b7848e594"}
 	{
 	"name": "TritonBundler.read_and_emit",
-	"ts": 1733463844631911.8,
+	"ts": 1733527490532212.2,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.632000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "80b361698c7864b0c3b2d03d75f26c65"}
+V1206 15:24:50.532000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "e60ce588f8a8da2d850b98d827e6543e"}
 	{
 	"name": "PyCodeCache.load_by_key_path",
-	"ts": 1733463844632645.5,
+	"ts": 1733527490532650.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.656000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d98b786d051bead708a68d7d015a4331"}
+V1206 15:24:50.545000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d4c6869eeaa9ef14de6b518a45d9c651"}
 	{
 	"name": "async_compile.wait",
-	"ts": 1733463844656377.8,
+	"ts": 1733527490545938.2,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.748000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "1c256a31f9a9ecb13fc2ef70a3bc8bcf"}
+V1206 15:24:50.599000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "a05612636136909ead93ebf04e1a42ad"}
 	{
 	"name": "async_compile.wait",
-	"ts": 1733463844748061.2,
+	"ts": 1733527490599093.0,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.748000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c77daf5f292bead4d2bfcc2972efa7de"}
+V1206 15:24:50.599000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "eb0838b082ff5e829a02f8ce1aae27cf"}
 	{
 	"name": "PyCodeCache.load_by_key_path",
-	"ts": 1733463844748657.5,
+	"ts": 1733527490599434.2,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.749000 1657128 torch/_inductor/codecache.py:1267] {"inductor_output_code": {"filename": "/tmp/tmph3za5c24/vs/cvs4vumksf6bwll3igphroshjevbea7jp53cxywxrhliugsk43fm.py"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "0aa5b4706bd4357e344ccb8187abb38c"}
+V1206 15:24:50.599000 1667746 torch/_inductor/codecache.py:1267] {"inductor_output_code": {"filename": "/tmp/tmpsn77tlsm/sm/csmb7f23sz2oa7mngdcoprpgrrbziqzirwhkjffdmxv6qhdf3e57.py"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "9ced76d2b85dd84790793e85fcadcc28"}
 	# AOT ID: ['0_inference']
 	from ctypes import c_void_p, c_long, c_int
 	import torch
@@ -3450,7 +3450,7 @@ V1205 21:44:04.749000 1657128 torch/_inductor/codecache.py:1267] {"inductor_outp
 	empty_strided_p2p = torch._C._distributed_c10d._SymmetricMemory.empty_strided_p2p
 	
 	
-	# kernel path: /tmp/tmph3za5c24/wq/cwqhrcmg46q7si24xk2wvtwdqmnsixsarf7zy5e6poex5bl4gdlu.py
+	# kernel path: /tmp/tmpsn77tlsm/xo/cxokkamvqulf4elvqs25iez7wbmqqfbp3i4qy7g2hwvher2zrziz.py
 	# Topologically Sorted Source Nodes: [flex_attention], Original ATen: []
 	# Source node to ATen node mapping:
 	#   flex_attention => flex_attention
@@ -3468,8 +3468,8 @@ V1205 21:44:04.749000 1657128 torch/_inductor/codecache.py:1267] {"inductor_outp
 	@triton_heuristics.template(
 	    num_stages=3,
 	    num_warps=4,
-	    triton_meta={'signature': {'arg_Q': '*fp32', 'arg_K': '*fp32', 'arg_V': '*fp32', 'arg_LSE': '*fp32', 'arg_KV_NUM_BLKS': '*i32', 'arg_KV_IDX': '*i32', 'arg_FULL_KV_NUM_BLKS': '*i32', 'arg_FULL_KV_IDX': '*i32', 'out_ptr0': '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=80, major=8, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=108, warp_size=32), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8), equal_to_1=())]},
-	    inductor_meta={'kernel_name': 'triton_tem_fused_0', 'backend_hash': '59CDC28C5AC44AE92A1C88C87D935A8E2ADCDA7AF532B6269756431229691604', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': False, 'autotune_pointwise': True, 'autotune_remote_cache': False, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	    triton_meta={'signature': {'arg_Q': '*fp32', 'arg_K': '*fp32', 'arg_V': '*fp32', 'arg_LSE': '*fp32', 'arg_KV_NUM_BLKS': '*i32', 'arg_KV_IDX': '*i32', 'arg_FULL_KV_NUM_BLKS': '*i32', 'arg_FULL_KV_IDX': '*i32', 'out_ptr0': '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132, warp_size=32), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8), equal_to_1=())]},
+	    inductor_meta={'kernel_name': 'triton_tem_fused_0', 'backend_hash': '562E840D41CEB1D8E51DE726EA7592B0C37A0C6FBD72CF4E958863CEC11D41A7', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': False, 'autotune_pointwise': True, 'autotune_remote_cache': False, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
 	)
 	@triton.jit
 	def triton_tem_fused_0(arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0):
@@ -3966,13 +3966,13 @@ V1205 21:44:04.749000 1657128 torch/_inductor/codecache.py:1267] {"inductor_outp
 	    from torch._inductor.wrapper_benchmark import compiled_module_main
 	    compiled_module_main('None', benchmark_compiled_module)
 	
-V1205 21:44:04.750000 1657128 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "165bddf025a17c7b287f38ad1ae26347"}
+V1206 15:24:50.600000 1667746 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "0b77d8bc6ee36d773c74713ef29c7921"}
 	{
 	"name": "fx_graph_cache_hit",
-	"ts": 1733463844616361.8,
+	"ts": 1733527490524020.8,
 	"args": {
 	"triton_bundler_meta": "TritonBundlerMetadata(cached_kernel_names=[])",
-	"key": "f3gi4pndnobstdl23se5doxewpvvcxdsjxmjk6hyx2eeiggheylt",
+	"key": "ff6i4wr5gqsaog5zgs3qrwebcodgoghruloxh5pzs6iul3cmayau",
 	"components": [
 	"[v4wl2w2eawehu2yuzv4ftucrovobmkotky2oimjdvonwbz3g4ir] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`",
 	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
@@ -4008,10 +4008,10 @@ V1205 21:44:04.750000 1657128 torch/_dynamo/utils.py:1327] {"chromium_event": {}
 	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10",
 	"[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)",
 	"[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)",
-	"[lazrphiwnez3ji6des6gtc4njrnab7zwbev2y4zmtejzolkeonc] torch_version: <bytes>",
-	"[c3z7bmoxyo6gl5hi47v6dc7jwsl55b3asd75nr25uyengi5ah3p] system_info[device]: {'name': 'NVIDIA PG509-210'}",
-	"[4tdxvgx6mly7ezamz2ha3km5vsr3c2li4xy77jtowhccraezut3] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-8d217ee87b6d439b8ab320cb84969cbec81a3692993e42439efbd2555077cdbd-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.1'}",
-	"[vuha4ftpp7xvpssr5oo6hre5gds5luljde44u6n6dahpycrz46w] system_info[hash]: bd18ba19a73d872953f4b194a13bfda93fc87d9d602faf8815918f646b14fef9",
+	"[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>",
+	"[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}",
+	"[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}",
+	"[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d",
 	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False",
 	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True",
 	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False",
@@ -4267,10 +4267,10 @@ V1205 21:44:04.750000 1657128 torch/_dynamo/utils.py:1327] {"chromium_event": {}
 	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None",
 	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"
 	],
-	"cache_event_time": 1733463844749651653,
+	"cache_event_time": 1733527490600011529,
 	"cache_state": "hit",
-	"time_saved_ns": 7665057603,
-	"compile_id": "-/1/0"
+	"time_saved_ns": 4009141776,
+	"compile_id": "1/0"
 	},
 	"ph": "i",
 	"cat": "dynamo_timed",
@@ -4278,20 +4278,20 @@ V1205 21:44:04.750000 1657128 torch/_dynamo/utils.py:1327] {"chromium_event": {}
 	"pid": 0,
 	"s": "p"
 	}
-V1205 21:44:04.750000 1657128 torch/_inductor/compile_fx.py:751] {"artifact": {"name": "fx_graph_cache_hit", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "f582f01ad290f4bcb1ae3bb2c3e351ee"}
-	{"triton_bundler_meta": "TritonBundlerMetadata(cached_kernel_names=[])", "key": "f3gi4pndnobstdl23se5doxewpvvcxdsjxmjk6hyx2eeiggheylt", "components": ["[v4wl2w2eawehu2yuzv4ftucrovobmkotky2oimjdvonwbz3g4ir] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[3]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[4]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[5]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[6]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[7]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[8]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[9]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[10]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", "[lmglpn4zi7vob56n34r2j2rk7flv5xfgrcvmo7xcpirqsitygqx] fx_kwargs[boxed_forward_device_index]: BoxedDeviceIndex(value=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", "[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] fx_kwargs[static_input_idxs]: []", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inputs_to_check[1]: 1", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inputs_to_check[2]: 2", "[kcuxe2zwm3mzv2uk6adm6iskoy35bqfv725twacrdewod2dbl5d] inputs_to_check[3]: 3", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inputs_to_check[4]: 4", "[qs5hilycp4ew4ivtc7m5jaxp7q4pm5slioxw3fi3ur6ei65ybz4] inputs_to_check[5]: 5", "[agkvbkaha53nbz3aeeuhvxjvvc4glhfjofzkg6g2qjoo2e5otcx] inputs_to_check[6]: 6", "[j3s5elu6itwgjafc7rzhy4whrbufl6kfmlufjhh25grt643bk5f] inputs_to_check[7]: 7", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inputs_to_check[8]: 8", "[qlgfiyqewrmkgqth2qm6wkq2ja5lzkapg3ypgnvoyfqqnidaoj3] inputs_to_check[9]: 9", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10", "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", "[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)", "[lazrphiwnez3ji6des6gtc4njrnab7zwbev2y4zmtejzolkeonc] torch_version: <bytes>", "[c3z7bmoxyo6gl5hi47v6dc7jwsl55b3asd75nr25uyengi5ah3p] system_info[device]: {'name': 'NVIDIA PG509-210'}", "[4tdxvgx6mly7ezamz2ha3km5vsr3c2li4xy77jtowhccraezut3] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-8d217ee87b6d439b8ab320cb84969cbec81a3692993e42439efbd2555077cdbd-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.1'}", "[vuha4ftpp7xvpssr5oo6hre5gds5luljde44u6n6dahpycrz46w] system_info[hash]: bd18ba19a73d872953f4b194a13bfda93fc87d9d602faf8815918f646b14fef9", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fx_graph_remote_cache]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_local_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_remote_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[bundled_autotune_remote_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[custom_op_default_layout_constraint]: needs_fixed_stride_order", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {}", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True", "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128", "[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False", "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", "[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096", "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", "[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {}", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: []", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"], "cache_event_time": 1733463844749651653, "cache_state": "hit", "time_saved_ns": 7665057603, "compile_id": "-/1/0"}
-V1205 21:44:04.751000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "7e5d1be7e4951f446261402748889cfb"}
+V1206 15:24:50.600000 1667746 torch/_inductor/compile_fx.py:751] {"artifact": {"name": "fx_graph_cache_hit", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d8cb8a6a0ac5b1d58d28337f3f1a055a"}
+	{"triton_bundler_meta": "TritonBundlerMetadata(cached_kernel_names=[])", "key": "ff6i4wr5gqsaog5zgs3qrwebcodgoghruloxh5pzs6iul3cmayau", "components": ["[v4wl2w2eawehu2yuzv4ftucrovobmkotky2oimjdvonwbz3g4ir] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[3]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[4]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[5]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[6]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[7]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[8]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[9]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[10]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", "[lmglpn4zi7vob56n34r2j2rk7flv5xfgrcvmo7xcpirqsitygqx] fx_kwargs[boxed_forward_device_index]: BoxedDeviceIndex(value=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", "[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] fx_kwargs[static_input_idxs]: []", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inputs_to_check[1]: 1", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inputs_to_check[2]: 2", "[kcuxe2zwm3mzv2uk6adm6iskoy35bqfv725twacrdewod2dbl5d] inputs_to_check[3]: 3", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inputs_to_check[4]: 4", "[qs5hilycp4ew4ivtc7m5jaxp7q4pm5slioxw3fi3ur6ei65ybz4] inputs_to_check[5]: 5", "[agkvbkaha53nbz3aeeuhvxjvvc4glhfjofzkg6g2qjoo2e5otcx] inputs_to_check[6]: 6", "[j3s5elu6itwgjafc7rzhy4whrbufl6kfmlufjhh25grt643bk5f] inputs_to_check[7]: 7", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inputs_to_check[8]: 8", "[qlgfiyqewrmkgqth2qm6wkq2ja5lzkapg3ypgnvoyfqqnidaoj3] inputs_to_check[9]: 9", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10", "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", "[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)", "[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>", "[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}", "[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}", "[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fx_graph_remote_cache]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_local_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_remote_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[bundled_autotune_remote_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[custom_op_default_layout_constraint]: needs_fixed_stride_order", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {}", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True", "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128", "[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False", "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", "[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096", "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", "[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {}", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: []", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"], "cache_event_time": 1733527490600011529, "cache_state": "hit", "time_saved_ns": 4009141776, "compile_id": "1/0"}
+V1206 15:24:50.601000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "13cc116eca5abf2a6d7e2ad12c7d39c5"}
 	{
 	"name": "inductor_compile",
-	"ts": 1733463844751157.8,
+	"ts": 1733527490600831.8,
 	"args": {
 	"fn_name": "compile_fx_inner",
-	"compile_id": "-/1/0",
+	"compile_id": "1/0",
 	"is_backward": false,
 	"cached_kernel_names": [],
 	"cache_state": "hit",
-	"cache_event_time": 1733463844616361767,
-	"key": "f3gi4pndnobstdl23se5doxewpvvcxdsjxmjk6hyx2eeiggheylt",
+	"cache_event_time": 1733527490524020812,
+	"key": "ff6i4wr5gqsaog5zgs3qrwebcodgoghruloxh5pzs6iul3cmayau",
 	"components": [
 	"[v4wl2w2eawehu2yuzv4ftucrovobmkotky2oimjdvonwbz3g4ir] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`",
 	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
@@ -4327,10 +4327,10 @@ V1205 21:44:04.751000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10",
 	"[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)",
 	"[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)",
-	"[lazrphiwnez3ji6des6gtc4njrnab7zwbev2y4zmtejzolkeonc] torch_version: <bytes>",
-	"[c3z7bmoxyo6gl5hi47v6dc7jwsl55b3asd75nr25uyengi5ah3p] system_info[device]: {'name': 'NVIDIA PG509-210'}",
-	"[4tdxvgx6mly7ezamz2ha3km5vsr3c2li4xy77jtowhccraezut3] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-8d217ee87b6d439b8ab320cb84969cbec81a3692993e42439efbd2555077cdbd-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.1'}",
-	"[vuha4ftpp7xvpssr5oo6hre5gds5luljde44u6n6dahpycrz46w] system_info[hash]: bd18ba19a73d872953f4b194a13bfda93fc87d9d602faf8815918f646b14fef9",
+	"[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>",
+	"[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}",
+	"[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}",
+	"[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d",
 	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False",
 	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True",
 	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False",
@@ -4595,41 +4595,41 @@ V1205 21:44:04.751000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.752000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "6e22b36c5be57a0ceb466304376071a8"}
+V1206 15:24:50.601000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ad742b9e5576eedb50497df147df1721"}
 	{
 	"name": "compile_fx.<locals>.fw_compiler_base",
-	"ts": 1733463844752267.0,
+	"ts": 1733527490601481.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.755000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "7afded4e669bb432280d8943bef3aece"}
+V1206 15:24:50.603000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "cb401f4f44ce40d91fb40593e2e0fab8"}
 	{
 	"name": "create_aot_dispatcher_function",
-	"ts": 1733463844755747.8,
+	"ts": 1733527490603290.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.756000 1657128 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "e898dae130f5c55b490b6d3f43aaaee3"}
+V1206 15:24:50.603000 1667746 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "034a66d21d6ca4969f8613317ccf781b"}
 	{
 	"name": "autograd_cache_bypass",
-	"ts": 1733463844508572.0,
+	"ts": 1733527490471931.2,
 	"args": {
 	"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention",
 	"cache_bypass_hard_exception": false,
 	"key": null,
 	"cache_state": "bypass",
 	"components": [],
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "i",
 	"cat": "dynamo_timed",
@@ -4637,19 +4637,19 @@ V1205 21:44:04.756000 1657128 torch/_dynamo/utils.py:1327] {"chromium_event": {}
 	"pid": 0,
 	"s": "p"
 	}
-V1205 21:44:04.756000 1657128 torch/_functorch/_aot_autograd/autograd_cache.py:763] {"artifact": {"name": "aotautograd_cache_hash", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b55f3a3a16b369b568f062c1351f1185"}
-	{"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention", "cache_bypass_hard_exception": false, "key": null, "cache_state": "bypass", "components": [], "compile_id": "-/1/0"}
-V1205 21:44:04.756000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "992bd492d36f789d2733f65a56d3ad82"}
+V1206 15:24:50.603000 1667746 torch/_functorch/_aot_autograd/autograd_cache.py:763] {"artifact": {"name": "aotautograd_cache_hash", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "5601d02186053adcc1ba29fd248c1d20"}
+	{"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention", "cache_bypass_hard_exception": false, "key": null, "cache_state": "bypass", "components": [], "compile_id": "1/0"}
+V1206 15:24:50.603000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "bd634b54ab5138da3c38e2434aae7337"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463844756810.5,
+	"ts": 1733527490603908.2,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/1/0",
+	"compile_id": "1/0",
 	"requires_subclass_dispatch": false,
 	"dispatch_mode": "inference",
 	"cache_state": "bypass",
-	"cache_event_time": 1733463844508571899,
+	"cache_event_time": 1733527490471931164,
 	"key": null,
 	"components": [],
 	"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention",
@@ -4661,7 +4661,7 @@ V1205 21:44:04.756000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.789000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d09f94c672a71bf0049243dada45a217"}
+V1206 15:24:50.618000 1667746 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "771ab34a6bd9546eb139960f16071592"}
 	
 	TREE_GUARD_MANAGER:
 	+- RootGuardManager
@@ -4669,21 +4669,21 @@ V1205 21:44:04.789000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| +- GLOBAL_STATE: ___check_global_state()
 	| +- TORCH_FUNCTION_MODE_STACK: ___check_torch_function_mode_stack()
 	| +- GuardManager: source=L['k'], accessed_by=DictGetItemGuardAccessor('k')
-	| | +- TYPE_MATCH: ___check_type_id(L['k'], 82291104)                          
+	| | +- TYPE_MATCH: ___check_type_id(L['k'], 82181376)                          
 	| | +- TENSOR_MATCH: check_tensor(L['k'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
 	| | +- NO_HASATTR: hasattr(L['k'], '_dynamo_dynamic_indices') == False         
 	| | +- NO_TENSOR_ALIASING: check_no_aliasing(L['k'], L['q'], L['v'], L['block_mask'].q_indices, L['block_mask'].kv_indices, L['block_mask'].q_num_blocks, L['block_mask'].kv_num_blocks, L['block_mask'].full_q_indices, L['block_mask'].full_kv_indices, L['block_mask'].full_q_num_blocks, L['block_mask'].full_kv_num_blocks)
 	| | +- GuardManager: source=L['k'].dim, accessed_by=GetAttrGuardAccessor(dim)
 	| | +- GuardManager: source=L['k'].size, accessed_by=GetAttrGuardAccessor(size)
 	| +- GuardManager: source=L['q'], accessed_by=DictGetItemGuardAccessor('q')
-	| | +- TYPE_MATCH: ___check_type_id(L['q'], 82291104)                          
+	| | +- TYPE_MATCH: ___check_type_id(L['q'], 82181376)                          
 	| | +- TENSOR_MATCH: check_tensor(L['q'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
 	| | +- NO_HASATTR: hasattr(L['q'], '_dynamo_dynamic_indices') == False         
 	| | +- NO_TENSOR_ALIASING
 	| | +- GuardManager: source=L['q'].dim, accessed_by=GetAttrGuardAccessor(dim)
 	| | +- GuardManager: source=L['q'].size, accessed_by=GetAttrGuardAccessor(size)
 	| +- GuardManager: source=L['v'], accessed_by=DictGetItemGuardAccessor('v')
-	| | +- TYPE_MATCH: ___check_type_id(L['v'], 82291104)                          
+	| | +- TYPE_MATCH: ___check_type_id(L['v'], 82181376)                          
 	| | +- TENSOR_MATCH: check_tensor(L['v'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
 	| | +- NO_HASATTR: hasattr(L['v'], '_dynamo_dynamic_indices') == False         
 	| | +- NO_TENSOR_ALIASING
@@ -4691,12 +4691,12 @@ V1205 21:44:04.789000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | +- GuardManager: source=L['v'].size, accessed_by=GetAttrGuardAccessor(size)
 	| +- GuardManager: source=L['score_mod'], accessed_by=DictGetItemGuardAccessor('score_mod')
 	| | +- GuardManager: source=L['score_mod'].__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | +- ID_MATCH: ___check_obj_id(L['score_mod'].__code__, 139667217695600)   
+	| | | +- ID_MATCH: ___check_obj_id(L['score_mod'].__code__, 140062268556144)   
 	| +- GuardManager: source=L['block_mask'], accessed_by=DictGetItemGuardAccessor('block_mask')
-	| | +- TYPE_MATCH: ___check_type_id(L['block_mask'], 396207408)                
+	| | +- TYPE_MATCH: ___check_type_id(L['block_mask'], 139679664)                
 	| | +- GuardManager: source=L['block_mask'].mask_mod, accessed_by=GetAttrGuardAccessor(mask_mod)
 	| | | +- GuardManager: source=L['block_mask'].mask_mod.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | +- ID_MATCH: ___check_obj_id(L['block_mask'].mask_mod.__code__, 139667217696224)
+	| | | | +- ID_MATCH: ___check_obj_id(L['block_mask'].mask_mod.__code__, 140062268556768)
 	| | +- GuardManager: source=L['block_mask'].q_indices, accessed_by=GetAttrGuardAccessor(q_indices)
 	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].q_indices, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
 	| | | +- NO_HASATTR: hasattr(L['block_mask'].q_indices, '_dynamo_dynamic_indices') == False
@@ -4742,7 +4742,7 @@ V1205 21:44:04.789000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | | | +- ID_MATCH: ___check_obj_id(L['block_mask'].as_tuple.__defaults__[0], 8908032)
 	| +- GuardManager: source=L['flex_attention'], accessed_by=DictGetItemGuardAccessor('flex_attention')
 	| | +- GuardManager: source=L['flex_attention'].__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__code__, 397452288)    
+	| | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__code__, 139318784)    
 	| | +- GuardManager: source=L['flex_attention'], accessed_by=FuncDefaultsGuardAccessor
 	| | | +- GuardManager: source=L['flex_attention'].__defaults__[2], accessed_by=GetItemGuardAccessor(2)
 	| | | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__defaults__[2], 8822752)
@@ -4753,17 +4753,17 @@ V1205 21:44:04.789000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | +- GuardManager: source=L['flex_attention'].__defaults__[5], accessed_by=GetItemGuardAccessor(5)
 	| | | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__defaults__[5], 8822752)
 	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
-	| | +- GuardManager: source=G['_139667213076496_c3'], accessed_by=DictGetItemGuardAccessor('_139667213076496_c3')
-	| | | +- GuardManager: source=G['_139667213076496_c3'].Tensor, accessed_by=GetAttrGuardAccessor(Tensor)
-	| | | | +- ID_MATCH: ___check_obj_id(G['_139667213076496_c3'].Tensor, 82291104)  
-	| | | | +- GuardManager: source=G['_139667213076496_c3'].Tensor.__bases__, accessed_by=GetAttrGuardAccessor(__bases__)
-	| | | | | +- GuardManager: source=G['_139667213076496_c3'].Tensor.__bases__[0], accessed_by=TupleGetItemGuardAccessor(0)
-	| | | | | | +- ID_MATCH: ___check_obj_id(G['_139667213076496_c3'].Tensor.__bases__[0], 139667196393888)
+	| | +- GuardManager: source=G['_140062263790704_c3'], accessed_by=DictGetItemGuardAccessor('_140062263790704_c3')
+	| | | +- GuardManager: source=G['_140062263790704_c3'].Tensor, accessed_by=GetAttrGuardAccessor(Tensor)
+	| | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c3'].Tensor, 82181376)  
+	| | | | +- GuardManager: source=G['_140062263790704_c3'].Tensor.__bases__, accessed_by=GetAttrGuardAccessor(__bases__)
+	| | | | | +- GuardManager: source=G['_140062263790704_c3'].Tensor.__bases__[0], accessed_by=TupleGetItemGuardAccessor(0)
+	| | | | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c3'].Tensor.__bases__[0], 140062119703136)
 	| | +- GuardManager: source=G['__builtins_dict___6'], accessed_by=DictGetItemGuardAccessor('__builtins_dict___6')
 	| | | +- GuardManager: source=G['__builtins_dict___6']['len'], accessed_by=DictGetItemGuardAccessor('len')
-	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___6']['len'], 139667218584480)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___6']['len'], 140062269592480)
 	| | | +- GuardManager: source=G['__builtins_dict___6']['sum'], accessed_by=DictGetItemGuardAccessor('sum')
-	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___6']['sum'], 139667218585600)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___6']['sum'], 140062269593600)
 	| | | +- GuardManager: source=G['__builtins_dict___6']['list'], accessed_by=DictGetItemGuardAccessor('list')
 	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___6']['list'], 8841312)  
 	| | | +- GuardManager: source=G['__builtins_dict___6']['type'], accessed_by=DictGetItemGuardAccessor('type')
@@ -4773,16 +4773,16 @@ V1205 21:44:04.789000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | +- GuardManager: source=G['__builtins_dict___6']['object'], accessed_by=DictGetItemGuardAccessor('object')
 	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___6']['object'], 8810976)
 	| | | +- GuardManager: source=G['__builtins_dict___6']['isinstance'], accessed_by=DictGetItemGuardAccessor('isinstance')
-	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___6']['isinstance'], 139667218584160)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___6']['isinstance'], 140062269592160)
 	| | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_utils_dot__pytree')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'], 139665266915776)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'], 140057600329744)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].TreeSpec, accessed_by=GetAttrGuardAccessor(TreeSpec)
-	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].TreeSpec, 87414528)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].TreeSpec, 86791872)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf, accessed_by=GetAttrGuardAccessor(_is_leaf)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, 139665266954544)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, 140057601749808)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, accessed_by=GetAttrGuardAccessor(_LEAF_SPEC)
-	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, 87437744)
+	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, 87025712)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, accessed_by=GetAttrGuardAccessor(type)
 	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, 8822752)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.context, accessed_by=GetAttrGuardAccessor(context)
@@ -4798,31 +4798,31 @@ V1205 21:44:04.789000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | | | +- LENGTH_CHECK: not G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type, accessed_by=GetAttrGuardAccessor(_get_node_type)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, 139665266954272)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, 140057601749536)
 	| | | +- DictGuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES, accessed_by=GetAttrGuardAccessor(SUPPORTED_NODES)
-	| | | | +- DICT_VERSION: ___dict_version(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES) == 431080
+	| | | | +- DICT_VERSION: ___dict_version(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES) == 366913
 	| | | | +- KeyValueManager pair at index=1
 	| | | | | +- ValueManager: GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]]
 	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn, accessed_by=GetAttrGuardAccessor(flatten_fn)
 	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, 139665266871808)
+	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, 140057599989696)
 	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn, accessed_by=GetAttrGuardAccessor(unflatten_fn)
 	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, 139665266552240)
+	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, 140057600267792)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper, accessed_by=GetAttrGuardAccessor(_tree_flatten_helper)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, 139665266608368)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, 140057600001392)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance, accessed_by=GetAttrGuardAccessor(_is_namedtuple_instance)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, 139665266607920)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, 140057600000944)
 	| | +- GuardManager: source=G['__import_torch_dot__dynamo_dot_comptime'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot__dynamo_dot_comptime')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_comptime'], 139664219014784)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_comptime'], 140057359527872)
 	| | +- GuardManager: source=G['__import_torch_dot__dynamo_dot_decorators'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot__dynamo_dot_decorators')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_decorators'], 139664218897952)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_decorators'], 140057359526192)
 	| | | +- GuardManager: source=G['__import_torch_dot__dynamo_dot_decorators'].is_compiling, accessed_by=GetAttrGuardAccessor(is_compiling)
-	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_decorators'].is_compiling, 139664343945664)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_decorators'].is_compiling, 140057422814624)
 	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_nn_dot_attention_dot__utils')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot__utils'], 139663431923360)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot__utils'], 140048551571072)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS, accessed_by=GetAttrGuardAccessor(_SUPPORTED_HEAD_DIMS)
 	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS, 8841312)
 	| | | | +- LENGTH_CHECK: len(G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS) == 10
@@ -4839,67 +4839,67 @@ V1205 21:44:04.789000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[5], accessed_by=ListGetItemGuardAccessor(5)
 	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[5] == 64
 	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_nn_dot_attention_dot_flex_attention')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'], 139663431922960)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'], 140048551568912)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math, accessed_by=GetAttrGuardAccessor(math)
-	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math, 139667216591312)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math, 140062267404384)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math.sqrt, accessed_by=GetAttrGuardAccessor(sqrt)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math.sqrt, 139667216595152)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math.sqrt, 140062267408144)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch, accessed_by=GetAttrGuardAccessor(torch)
-	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch, 139667213076496)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch, 140062263790704)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo, accessed_by=GetAttrGuardAccessor(_dynamo)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo, 139667211310544)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo, 140062262057760)
 	| | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static, accessed_by=GetAttrGuardAccessor(mark_static)
 	| | | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static.__code__, 107696464)
+	| | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static.__code__, 125680000)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler, accessed_by=GetAttrGuardAccessor(compiler)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler, 139664804054656)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler, 140057571877776)
 	| | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler.is_dynamo_compiling, accessed_by=GetAttrGuardAccessor(is_dynamo_compiling)
-	| | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler.is_dynamo_compiling, 139664804198208)
+	| | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler.is_dynamo_compiling, 140057572232544)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.is_grad_enabled, accessed_by=GetAttrGuardAccessor(is_grad_enabled)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.is_grad_enabled, 139667201822368)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.is_grad_enabled, 140062253115936)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device, accessed_by=GetAttrGuardAccessor(_validate_device)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device.__code__, 139663431699312)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device.__code__, 140048552018544)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop, accessed_by=GetAttrGuardAccessor(flex_attention_hop)
-	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop, 97810240)
+	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop, 96230624)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop.__name__, accessed_by=GetAttrGuardAccessor(__name__)
 	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop.__name__ == 'flex_attention'
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim, accessed_by=GetAttrGuardAccessor(_supported_head_dim)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim.__code__, 139663431174224)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim.__code__, 140048552864992)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim, accessed_by=GetAttrGuardAccessor(_validate_embed_dim)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim.__code__, 395883648)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim.__code__, 139781872)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness, accessed_by=GetAttrGuardAccessor(_validate_nestedness)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness.__code__, 139663466500208)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness.__code__, 140048553100560)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input, accessed_by=GetAttrGuardAccessor(_validate_sdpa_input)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input.__code__, 76825296)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input.__code__, 139823744)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options, accessed_by=GetAttrGuardAccessor(_apply_kernel_options)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options.__code__, 139663477636016)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options.__code__, 140048554555888)
 	
-V1205 21:44:04.789000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "5b0134fe6919376ebe37ca312701037d"}
+V1206 15:24:50.619000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b414209b816a551182c3cbb3ebebb2ce"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463844789731.0,
+	"ts": 1733527490619189.8,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.794000 1657128 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"pre_grad_pass_time_us": 935, "joint_graph_pass_time_us": 1958, "feature_usage": {"pytorch/remote_cache:fx_graph_memcache_version": true}, "distributed_ephemeral_timeout_us": 7665057, "inductor_cumulative_compile_time_us": 135983, "aot_autograd_cumulative_compile_time_us": 254042, "dynamo_cumulative_compile_time_us": 466719, "frame_key": "2", "co_name": "fn", "co_filename": "/data/users/xmfan/a/pytorch/test/inductor/test_codecache.py", "co_firstlineno": 459, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 91, "shape_env_guard_count": 0, "graph_op_count": 2, "graph_node_count": 16, "graph_input_count": 11, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch.testing\", \"torch._decomp\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.795000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "49bbcecf05f7feb01cbf7dc9bfbe34cc"}
+V1206 15:24:50.621000 1667746 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"pre_grad_pass_time_us": 521, "joint_graph_pass_time_us": 1048, "feature_usage": {"pytorch/remote_cache:fx_graph_memcache_version": true}, "distributed_ephemeral_timeout_us": 4009141, "inductor_cumulative_compile_time_us": 77456, "aot_autograd_cumulative_compile_time_us": 134960, "dynamo_cumulative_compile_time_us": 242984, "frame_key": "2", "co_name": "fn", "co_filename": "/data/users/xmfan/a/pytorch/test/inductor/test_codecache.py", "co_firstlineno": 459, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 91, "shape_env_guard_count": 0, "graph_op_count": 2, "graph_node_count": 16, "graph_input_count": 11, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.622000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c94fa0f0ebcc0c15f083237f9bd40372"}
 	{
 	"name": "dynamo",
-	"ts": 1733463844794912.5,
+	"ts": 1733527490621989.8,
 	"args": {
-	"compile_id": "-/1/0",
+	"compile_id": "1/0",
 	"frame_key": "2",
 	"co_name": "fn",
 	"co_filename": "/data/users/xmfan/a/pytorch/test/inductor/test_codecache.py",
@@ -4920,73 +4920,73 @@ V1205 21:44:04.795000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"restart_reasons": [],
 	"dynamo_time_before_restart_s": 0.0,
 	"has_guarded_code": true,
-	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch.testing\", \"torch._decomp\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
+	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.797000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "85c5b6dd7c525d3f361dcf7b84385e42"}
+V1206 15:24:50.623000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "f6ab5cb1835b6c744bb2f21749f1693d"}
 	{
 	"name": "dynamo",
-	"ts": 1733463844797522.0,
+	"ts": 1733527490623683.8,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.798000 1657128 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 1439, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 81, "name": "inner", "filename": 9}, {"line": 81, "name": "inner", "filename": 9}, {"line": 490, "name": "test_flex_attention_caching", "filename": 1}, {"line": 466, "name": "fn2", "filename": 1}, {"line": 1316, "name": "flex_attention", "filename": 10}, {"line": 1303, "name": "_flex_attention_hop_wrapper", "filename": 10}]}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.798000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "e9142a620faf0f7c55ab31dfca573a3e"}
+V1206 15:24:50.624000 1667746 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 1439, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 81, "name": "inner", "filename": 9}, {"line": 81, "name": "inner", "filename": 9}, {"line": 490, "name": "test_flex_attention_caching", "filename": 1}, {"line": 466, "name": "fn2", "filename": 1}, {"line": 1316, "name": "flex_attention", "filename": 10}, {"line": 1303, "name": "_flex_attention_hop_wrapper", "filename": 10}]}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.624000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "0a08617336a65119a060bbb5cc88acf6"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463844798572.0,
+	"ts": 1733527490624241.5,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.801000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 320, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.801000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96eb10>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.801000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 0, "source": "L['args'][0]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.920000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 1, "describer_id": 320, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.921000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 2, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96eb70>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.921000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 2, "source": "L['args'][1]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.922000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 2, "describer_id": 320, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.923000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 3, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ec90>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.923000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 3, "source": "L['args'][2]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.924000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 3, "describer_id": 320, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.924000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 4, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96cd10>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.924000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 4, "source": "L['args'][4][0]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.926000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 4, "describer_id": 320, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.926000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 5, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ecf0>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.926000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 5, "source": "L['args'][4][1]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.927000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 5, "describer_id": 320, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.928000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 6, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ec30>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.928000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 6, "source": "L['args'][4][2]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.929000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 6, "describer_id": 320, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.929000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 7, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ed50>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.930000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 7, "source": "L['args'][4][3]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.931000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 7, "describer_id": 320, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.931000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 8, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ce30>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.931000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 8, "source": "L['args'][4][4]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.932000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 8, "describer_id": 320, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.933000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 9, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96eff0>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.933000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 9, "source": "L['args'][4][5]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.934000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 9, "describer_id": 320, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.934000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 10, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96cdd0>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.935000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 10, "source": "L['args'][4][6]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.936000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 10, "describer_id": 320, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.936000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 11, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96f050>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.936000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 11, "source": "L['args'][4][7]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.945000 1657128 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_args_0_": [1, 4, 512, 64], "l_args_1_": [1, 4, 512, 64], "l_args_2_": [1, 4, 512, 64], "l_args_4_0_": [1, 1, 16], "l_args_4_1_": [1, 1, 16, 16], "l_args_4_2_": [1, 1, 16], "l_args_4_3_": [1, 1, 16, 16], "l_args_4_4_": [1, 1, 16], "l_args_4_5_": [1, 1, 16, 16], "l_args_4_6_": [1, 1, 16], "l_args_4_7_": [1, 1, 16, 16], "getitem": [1, 4, 512, 64], "getitem_1": [1, 4, 512]}}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "9bb6d56ee069d3045a9d8d21bcfdcd31"}
+V1206 15:24:50.625000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 320, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.626000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab890>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.626000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 0, "source": "L['args'][0]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.686000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 1, "describer_id": 320, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.686000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 2, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab950>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.686000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 2, "source": "L['args'][1]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.687000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 2, "describer_id": 320, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.687000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 3, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5730>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.687000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 3, "source": "L['args'][2]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.688000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 3, "describer_id": 320, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.688000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 4, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982aaed0>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.688000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 4, "source": "L['args'][4][0]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.688000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 4, "describer_id": 320, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.688000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 5, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5850>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.689000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 5, "source": "L['args'][4][1]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.689000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 5, "describer_id": 320, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.689000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 6, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5790>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.689000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 6, "source": "L['args'][4][2]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.690000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 6, "describer_id": 320, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.690000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 7, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c58b0>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.690000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 7, "source": "L['args'][4][3]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.691000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 7, "describer_id": 320, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.691000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 8, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5c10>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.691000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 8, "source": "L['args'][4][4]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.691000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 8, "describer_id": 320, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.692000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 9, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5b50>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.692000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 9, "source": "L['args'][4][5]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.692000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 9, "describer_id": 320, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.692000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 10, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5cd0>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.692000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 10, "source": "L['args'][4][6]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.693000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 10, "describer_id": 320, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.693000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 11, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5bb0>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.693000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 11, "source": "L['args'][4][7]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.697000 1667746 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_args_0_": [1, 4, 512, 64], "l_args_1_": [1, 4, 512, 64], "l_args_2_": [1, 4, 512, 64], "l_args_4_0_": [1, 1, 16], "l_args_4_1_": [1, 1, 16, 16], "l_args_4_2_": [1, 1, 16], "l_args_4_3_": [1, 1, 16, 16], "l_args_4_4_": [1, 1, 16], "l_args_4_5_": [1, 1, 16, 16], "l_args_4_6_": [1, 1, 16], "l_args_4_7_": [1, 1, 16, 16], "getitem": [1, 4, 512, 64], "getitem_1": [1, 4, 512]}}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "9bb6d56ee069d3045a9d8d21bcfdcd31"}
 	class GraphModule(torch.nn.Module):
 	    def forward(self, L_args_0_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_args_1_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_args_2_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_args_4_0_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_1_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_args_4_2_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_3_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_args_4_4_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_5_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_args_4_6_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_7_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
 	        l_args_0_ = L_args_0_
@@ -5019,33 +5019,33 @@ V1205 21:44:04.945000 1657128 torch/_dynamo/output_graph.py:1336] {"dynamo_outpu
 	            ge: "b8[][]cuda:0" = child_2 >= child_3;  child_2 = child_3 = None
 	            return ge
 	            
-V1205 21:44:04.945000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "eb32d3aac910bc58506398ff0d803e22"}
+V1206 15:24:50.698000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "c8302b73abb12ac1bce37cef0c47d244"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463844945843.0,
+	"ts": 1733527490698251.5,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.946000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "4a476cec5f6063cdcc0621f1fa8418d2"}
+V1206 15:24:50.698000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "6b9dbcf1651e5e99de517f92d22baa7b"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463844946333.0,
+	"ts": 1733527490698529.8,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.970000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "3cdd8f830c81d0d39406165a148ed1ca"}
+V1206 15:24:50.709000 1667746 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "a32f9935e2d4f1dc06e2de195d9c24b2"}
 	
 	TREE_GUARD_MANAGER:
 	+- RootGuardManager
@@ -5069,7 +5069,7 @@ V1205 21:44:04.970000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | +- NO_TENSOR_ALIASING
 	| | +- GuardManager: source=L['args'][3], accessed_by=TupleGetItemGuardAccessor(3)
 	| | | +- GuardManager: source=L['args'][3].__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | +- ID_MATCH: ___check_obj_id(L['args'][3].__code__, 139667218246576)     
+	| | | | +- ID_MATCH: ___check_obj_id(L['args'][3].__code__, 140062269255152)     
 	| | +- GuardManager: source=L['args'][4], accessed_by=TupleGetItemGuardAccessor(4)
 	| | | +- TYPE_MATCH: ___check_type_id(L['args'][4], 8812224)                     
 	| | | +- LENGTH_CHECK: len(L['args'][4]) == 11                                     
@@ -5111,7 +5111,7 @@ V1205 21:44:04.970000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | | +- EQUALS_MATCH: L['args'][4][9] == 128                                      
 	| | | +- GuardManager: source=L['args'][4][10], accessed_by=TupleGetItemGuardAccessor(10)
 	| | | | +- GuardManager: source=L['args'][4][10].__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(L['args'][4][10].__code__, 139667217696224) 
+	| | | | | +- ID_MATCH: ___check_obj_id(L['args'][4][10].__code__, 140062268556768) 
 	| | +- GuardManager: source=L['args'][5], accessed_by=TupleGetItemGuardAccessor(5)
 	| | | +- EQUALS_MATCH: L['args'][5] == 0.125                                       
 	| | +- GuardManager: source=L['args'][6], accessed_by=TupleGetItemGuardAccessor(6)
@@ -5128,20 +5128,20 @@ V1205 21:44:04.970000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | +- DICT_LENGTH: not L['kwargs']                                             
 	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
 	| | +- GuardManager: source=G['flex_attention_hop'], accessed_by=DictGetItemGuardAccessor('flex_attention_hop')
-	| | | +- TYPE_MATCH: ___check_type_id(G['flex_attention_hop'], 97810240)         
+	| | | +- TYPE_MATCH: ___check_type_id(G['flex_attention_hop'], 96230624)         
 	| | | +- GuardManager: source=G['flex_attention_hop'].__name__, accessed_by=GetAttrGuardAccessor(__name__)
 	| | | | +- EQUALS_MATCH: G['flex_attention_hop'].__name__ == 'flex_attention'        
-	| | +- GuardManager: source=G['_139667213076496_c4'], accessed_by=DictGetItemGuardAccessor('_139667213076496_c4')
-	| | | +- GuardManager: source=G['_139667213076496_c4'].Tensor, accessed_by=GetAttrGuardAccessor(Tensor)
-	| | | | +- ID_MATCH: ___check_obj_id(G['_139667213076496_c4'].Tensor, 82291104)  
-	| | | | +- GuardManager: source=G['_139667213076496_c4'].Tensor.__bases__, accessed_by=GetAttrGuardAccessor(__bases__)
-	| | | | | +- GuardManager: source=G['_139667213076496_c4'].Tensor.__bases__[0], accessed_by=TupleGetItemGuardAccessor(0)
-	| | | | | | +- ID_MATCH: ___check_obj_id(G['_139667213076496_c4'].Tensor.__bases__[0], 139667196393888)
+	| | +- GuardManager: source=G['_140062263790704_c4'], accessed_by=DictGetItemGuardAccessor('_140062263790704_c4')
+	| | | +- GuardManager: source=G['_140062263790704_c4'].Tensor, accessed_by=GetAttrGuardAccessor(Tensor)
+	| | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c4'].Tensor, 82181376)  
+	| | | | +- GuardManager: source=G['_140062263790704_c4'].Tensor.__bases__, accessed_by=GetAttrGuardAccessor(__bases__)
+	| | | | | +- GuardManager: source=G['_140062263790704_c4'].Tensor.__bases__[0], accessed_by=TupleGetItemGuardAccessor(0)
+	| | | | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c4'].Tensor.__bases__[0], 140062119703136)
 	| | +- GuardManager: source=G['__builtins_dict___8'], accessed_by=DictGetItemGuardAccessor('__builtins_dict___8')
 	| | | +- GuardManager: source=G['__builtins_dict___8']['len'], accessed_by=DictGetItemGuardAccessor('len')
-	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___8']['len'], 139667218584480)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___8']['len'], 140062269592480)
 	| | | +- GuardManager: source=G['__builtins_dict___8']['sum'], accessed_by=DictGetItemGuardAccessor('sum')
-	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___8']['sum'], 139667218585600)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___8']['sum'], 140062269593600)
 	| | | +- GuardManager: source=G['__builtins_dict___8']['list'], accessed_by=DictGetItemGuardAccessor('list')
 	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___8']['list'], 8841312)  
 	| | | +- GuardManager: source=G['__builtins_dict___8']['type'], accessed_by=DictGetItemGuardAccessor('type')
@@ -5151,16 +5151,16 @@ V1205 21:44:04.970000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | +- GuardManager: source=G['__builtins_dict___8']['object'], accessed_by=DictGetItemGuardAccessor('object')
 	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___8']['object'], 8810976)
 	| | | +- GuardManager: source=G['__builtins_dict___8']['isinstance'], accessed_by=DictGetItemGuardAccessor('isinstance')
-	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___8']['isinstance'], 139667218584160)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___8']['isinstance'], 140062269592160)
 	| | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_utils_dot__pytree')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'], 139665266915776)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'], 140057600329744)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].TreeSpec, accessed_by=GetAttrGuardAccessor(TreeSpec)
-	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].TreeSpec, 87414528)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].TreeSpec, 86791872)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf, accessed_by=GetAttrGuardAccessor(_is_leaf)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, 139665266954544)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, 140057601749808)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, accessed_by=GetAttrGuardAccessor(_LEAF_SPEC)
-	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, 87437744)
+	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, 87025712)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, accessed_by=GetAttrGuardAccessor(type)
 	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, 8822752)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.context, accessed_by=GetAttrGuardAccessor(context)
@@ -5176,44 +5176,44 @@ V1205 21:44:04.970000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | | | +- LENGTH_CHECK: not G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type, accessed_by=GetAttrGuardAccessor(_get_node_type)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, 139665266954272)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, 140057601749536)
 	| | | +- DictGuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES, accessed_by=GetAttrGuardAccessor(SUPPORTED_NODES)
-	| | | | +- DICT_VERSION: ___dict_version(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES) == 431080
+	| | | | +- DICT_VERSION: ___dict_version(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES) == 366913
 	| | | | +- KeyValueManager pair at index=1
 	| | | | | +- ValueManager: GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]]
 	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn, accessed_by=GetAttrGuardAccessor(flatten_fn)
 	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, 139665266871808)
+	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, 140057599989696)
 	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn, accessed_by=GetAttrGuardAccessor(unflatten_fn)
 	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, 139665266552240)
+	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, 140057600267792)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper, accessed_by=GetAttrGuardAccessor(_tree_flatten_helper)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, 139665266608368)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, 140057600001392)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance, accessed_by=GetAttrGuardAccessor(_is_namedtuple_instance)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, 139665266607920)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, 140057600000944)
 	
-V1205 21:44:04.971000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "77615c24e221e77d7d2762b0a69c0ec4"}
+V1206 15:24:50.710000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "6d01a47ed965240f30333b7963f37f52"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463844971118.5,
+	"ts": 1733527490710248.2,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.975000 1657128 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 489, "dynamo_cumulative_compile_time_us": 172546, "frame_key": "1", "co_name": "_flex_attention_hop_wrapper", "co_filename": "/data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py", "co_firstlineno": 1303, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 58, "shape_env_guard_count": 0, "graph_op_count": 3, "graph_node_count": 17, "graph_input_count": 11, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 9223372036854775807, \"accumulated_cache_size_limit\": 9223372036854775807, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch.testing\", \"torch._decomp\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.976000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "6bf2a026ece4fe2c3d9f8ac2823a9fec"}
+V1206 15:24:50.712000 1667746 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 278, "dynamo_cumulative_compile_time_us": 86006, "frame_key": "1", "co_name": "_flex_attention_hop_wrapper", "co_filename": "/data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py", "co_firstlineno": 1303, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 58, "shape_env_guard_count": 0, "graph_op_count": 3, "graph_node_count": 17, "graph_input_count": 11, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 9223372036854775807, \"accumulated_cache_size_limit\": 9223372036854775807, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.713000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "bf4cb9e110f5e8f2910d1d589e140933"}
 	{
 	"name": "dynamo",
-	"ts": 1733463844976149.5,
+	"ts": 1733527490713016.2,
 	"args": {
-	"compile_id": "-/0/0",
+	"compile_id": "0/0",
 	"frame_key": "1",
 	"co_name": "_flex_attention_hop_wrapper",
 	"co_filename": "/data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py",
@@ -5234,73 +5234,73 @@ V1205 21:44:04.976000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"restart_reasons": [],
 	"dynamo_time_before_restart_s": 0.0,
 	"has_guarded_code": true,
-	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 9223372036854775807, \"accumulated_cache_size_limit\": 9223372036854775807, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch.testing\", \"torch._decomp\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
+	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 9223372036854775807, \"accumulated_cache_size_limit\": 9223372036854775807, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.979000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "2fcf8a783fbc992020a994b0f2a9439c"}
+V1206 15:24:50.715000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "34b5378127dd1adc3ac18035ff2352ab"}
 	{
 	"name": "dynamo",
-	"ts": 1733463844979394.0,
+	"ts": 1733527490715020.2,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.980000 1657128 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 1439, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 81, "name": "inner", "filename": 9}, {"line": 81, "name": "inner", "filename": 9}, {"line": 490, "name": "test_flex_attention_caching", "filename": 1}, {"line": 465, "name": "fn2", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.980000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "93026161aaaaa29d1efb3b70a2199331"}
+V1206 15:24:50.715000 1667746 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 1439, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 81, "name": "inner", "filename": 9}, {"line": 81, "name": "inner", "filename": 9}, {"line": 490, "name": "test_flex_attention_caching", "filename": 1}, {"line": 465, "name": "fn2", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.715000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "514fa302b9c8e853a86097889a21966b"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463844980337.5,
+	"ts": 1733527490715568.8,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:04.982000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 335, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.983000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96eb10>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.983000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 0, "source": "L['q']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.994000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 1, "describer_id": 335, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.995000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 1, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96eb70>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.995000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 1, "source": "L['k']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.996000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 2, "describer_id": 335, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.997000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 2, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ec90>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:04.997000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 2, "source": "L['v']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.008000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 3, "describer_id": 335, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.008000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 3, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96cd10>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.008000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 3, "source": "L['block_mask'].kv_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.135000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 4, "describer_id": 335, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.136000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 5, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ecf0>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.136000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 5, "source": "L['block_mask'].kv_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.138000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 5, "describer_id": 335, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.139000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 6, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ec30>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.139000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 6, "source": "L['block_mask'].full_kv_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.141000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 6, "describer_id": 335, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.141000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 7, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ed50>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.142000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 7, "source": "L['block_mask'].full_kv_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.143000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 7, "describer_id": 335, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.143000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 8, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96ce30>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.144000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 8, "source": "L['block_mask'].q_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.145000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 8, "describer_id": 335, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.145000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 9, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96eff0>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.146000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 9, "source": "L['block_mask'].q_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.147000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 9, "describer_id": 335, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.147000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 10, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96cdd0>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.148000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 10, "source": "L['block_mask'].full_q_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.149000 1657128 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 10, "describer_id": 335, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.149000 1657128 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 11, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f05ec96f050>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.150000 1657128 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 11, "source": "L['block_mask'].full_q_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:05.157000 1657128 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_q_": [1, 4, 512, 64], "l_k_": [1, 4, 512, 64], "l_v_": [1, 4, 512, 64], "l_block_mask_kv_num_blocks": [1, 1, 16], "l_block_mask_kv_indices": [1, 1, 16, 16], "l_block_mask_full_kv_num_blocks": [1, 1, 16], "l_block_mask_full_kv_indices": [1, 1, 16, 16], "l_block_mask_q_num_blocks": [1, 1, 16], "l_block_mask_q_indices": [1, 1, 16, 16], "l_block_mask_full_q_num_blocks": [1, 1, 16], "l_block_mask_full_q_indices": [1, 1, 16, 16], "out": [1, 4, 512, 64]}}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "482a8ec46bfbc33e69d27b2c7e75e43d"}
+V1206 15:24:50.716000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 335, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.717000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab890>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.717000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 0, "source": "L['q']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.723000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 1, "describer_id": 335, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.723000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 1, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab950>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.723000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 1, "source": "L['k']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.724000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 2, "describer_id": 335, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.724000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 2, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5730>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.724000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 2, "source": "L['v']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.729000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 3, "describer_id": 335, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.730000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 3, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982aaed0>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.730000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 3, "source": "L['block_mask'].kv_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.795000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 4, "describer_id": 335, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.795000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 5, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5850>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.796000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 5, "source": "L['block_mask'].kv_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.796000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 5, "describer_id": 335, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.797000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 6, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5790>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.797000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 6, "source": "L['block_mask'].full_kv_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.798000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 6, "describer_id": 335, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.798000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 7, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c58b0>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.798000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 7, "source": "L['block_mask'].full_kv_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.799000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 7, "describer_id": 335, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.799000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 8, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5c10>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.799000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 8, "source": "L['block_mask'].q_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.800000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 8, "describer_id": 335, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.800000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 9, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5b50>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.800000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 9, "source": "L['block_mask'].q_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.801000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 9, "describer_id": 335, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.801000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 10, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5cd0>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.801000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 10, "source": "L['block_mask'].full_q_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.802000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 10, "describer_id": 335, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.802000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 11, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5bb0>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.802000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 11, "source": "L['block_mask'].full_q_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:50.806000 1667746 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_q_": [1, 4, 512, 64], "l_k_": [1, 4, 512, 64], "l_v_": [1, 4, 512, 64], "l_block_mask_kv_num_blocks": [1, 1, 16], "l_block_mask_kv_indices": [1, 1, 16, 16], "l_block_mask_full_kv_num_blocks": [1, 1, 16], "l_block_mask_full_kv_indices": [1, 1, 16, 16], "l_block_mask_q_num_blocks": [1, 1, 16], "l_block_mask_q_indices": [1, 1, 16, 16], "l_block_mask_full_q_num_blocks": [1, 1, 16], "l_block_mask_full_q_indices": [1, 1, 16, 16], "out": [1, 4, 512, 64]}}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "482a8ec46bfbc33e69d27b2c7e75e43d"}
 	class GraphModule(torch.nn.Module):
 	    def forward(self, L_q_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_k_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_v_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_block_mask_kv_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_kv_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_block_mask_full_kv_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_full_kv_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_block_mask_q_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_q_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_block_mask_full_q_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_full_q_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
 	        l_q_ = L_q_
@@ -5332,56 +5332,56 @@ V1205 21:44:05.157000 1657128 torch/_dynamo/output_graph.py:1336] {"dynamo_outpu
 	            ge: "b8[][]cuda:0" = child_2 >= child_3;  child_2 = child_3 = None
 	            return ge
 	            
-V1205 21:44:05.158000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "4220009c4b03356ab8a4a73f81b603bc"}
+V1206 15:24:50.807000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "1d40dde81d7850073106ae3cf2229e48"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463845158636.8,
+	"ts": 1733527490807005.5,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.159000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "a8a8544cb7fbd0a31e8068afc85b349f"}
+V1206 15:24:50.807000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ca5c77d2199fb0261e09e1800f877003"}
 	{
 	"name": "_recursive_pre_grad_passes",
-	"ts": 1733463845159119.5,
+	"ts": 1733527490807277.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.160000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "6c33c285e6363b8bff144cf678170edb"}
+V1206 15:24:50.807000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "a8bfb8ba2a3fefc2b30a7110871d7e3a"}
 	{
 	"name": "_recursive_pre_grad_passes",
-	"ts": 1733463845160107.8,
+	"ts": 1733527490807804.0,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.164000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "fbab4ee801dac51b6b9bd11846ae7526"}
+V1206 15:24:50.810000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "3b3ffc9d594503c9a35ba96fe32b6bde"}
 	{
 	"name": "create_aot_dispatcher_function",
-	"ts": 1733463845164805.8,
+	"ts": 1733527490809984.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.257000 1657128 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:214] {"artifact": {"name": "aot_forward_graph_fw_metadata", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d952b3071dd9ba606a04d644841d9aba"}
+V1206 15:24:50.852000 1667746 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:214] {"artifact": {"name": "aot_forward_graph_fw_metadata", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d952b3071dd9ba606a04d644841d9aba"}
 	ViewAndMutationMeta(input_info=[InputAliasInfo(is_leaf=True,
 	                                              mutates_data=False,
 	                                              mutates_metadata=False,
@@ -5525,7 +5525,7 @@ V1205 21:44:05.257000 1657128 torch/_functorch/_aot_autograd/dispatch_and_compil
 	                    indices_of_inputs_that_requires_grad_with_mutations_in_bw=[],
 	                    bw_donated_idxs=None,
 	                    num_backward_tokens=0)
-V1205 21:44:05.259000 1657128 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:232] {"aot_inference_graph": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ecc276ab6d6e35dc44c5f2bbc091ace0"}
+V1206 15:24:50.853000 1667746 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:232] {"aot_inference_graph": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ecc276ab6d6e35dc44c5f2bbc091ace0"}
 	class <lambda>(torch.nn.Module):
 	    def forward(self, arg0_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg1_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg2_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg3_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg4_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg5_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg6_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg7_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg8_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg9_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg10_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
 	         # File: /data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py:1286 in flex_attention, code: out, lse = flex_attention_hop(
@@ -5545,56 +5545,56 @@ V1205 21:44:05.259000 1657128 torch/_functorch/_aot_autograd/dispatch_and_compil
 	            ge: "b8[][]cuda:0" = torch.ops.aten.ge.Tensor(arg2_1, arg3_1);  arg2_1 = arg3_1 = None
 	            return ge
 	            
-V1205 21:44:05.260000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "601fa874e377d432de2e45288d9c7d43"}
+V1206 15:24:50.853000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "de5dcb5ec710567996f5f45241f82616"}
 	{
 	"name": "compile_fx.<locals>.fw_compiler_base",
-	"ts": 1733463845260815.5,
+	"ts": 1733527490853953.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.261000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "8e612c1a5d601df3b801ed7f93c19d6c"}
+V1206 15:24:50.854000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "f7192ebd762dae9c8cf2d7972d6bf48e"}
 	{
 	"name": "_recursive_joint_graph_passes",
-	"ts": 1733463845261377.8,
+	"ts": 1733527490854257.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.263000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "3056c2aa2174deae40dd7963ed1b24d8"}
+V1206 15:24:50.855000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "bfb4a0e07ad2c3524d5a0a3c69449196"}
 	{
 	"name": "_recursive_joint_graph_passes",
-	"ts": 1733463845263406.5,
+	"ts": 1733527490855286.0,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.263000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "bbec453bf58acec5937668e5d2bb2845"}
+V1206 15:24:50.855000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "8d3b095b4ae2ad23dfdd31e5633971b0"}
 	{
 	"name": "inductor_compile",
-	"ts": 1733463845263838.8,
+	"ts": 1733527490855525.2,
 	"args": {
 	"fn_name": "compile_fx_inner",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.279000 1657128 torch/_inductor/compile_fx.py:835] {"artifact": {"name": "fx_graph_runnable", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "7bb73344f8ffc5ea0484b87c812cde68"}
+V1206 15:24:50.863000 1667746 torch/_inductor/compile_fx.py:835] {"artifact": {"name": "fx_graph_runnable", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "2685e2fe8ae1c10315e3657921ebac7a"}
 	
 	import torch
 	from torch import tensor, device
@@ -5611,10 +5611,10 @@ V1205 21:44:05.279000 1657128 torch/_inductor/compile_fx.py:835] {"artifact": {"
 	torch._dynamo.config.accumulated_cache_size_limit = 256
 	torch._dynamo.config.traceable_tensor_subclasses = set()
 	torch._dynamo.config.suppress_errors = False
-	torch._dynamo.config.allowed_functions_module_string_ignorelist = {'torch._refs', 'torch.testing', 'torch._decomp', 'torch.distributions', 'torch._prims'}
+	torch._dynamo.config.allowed_functions_module_string_ignorelist = {'torch._decomp', 'torch.testing', 'torch._prims', 'torch._refs', 'torch.distributions'}
 	torch._dynamo.config._ddp_optimization_mode = ['ddp_optimizer', 'python_reducer', 'python_reducer_without_compiled_forward', 'no_optimization']
 	torch._dynamo.config.raise_on_ctx_manager_usage = True
-	torch._dynamo.config._save_config_ignore = {'skipfiles_inline_module_allowlist', 'repro_level', 'constant_functions', 'repro_after'}
+	torch._dynamo.config._save_config_ignore = {'repro_after', 'skipfiles_inline_module_allowlist', 'repro_level', 'constant_functions'}
 	torch._dynamo.config.log_compilation_metrics = False
 	torch._dynamo.config.reorderable_logging_functions = set()
 	torch._dynamo.config._autograd_backward_strict_mode_banned_ops = ['stride', 'requires_grad', 'storage_offset', 'layout', 'data', 'is_coalesced', 'is_complex', 'is_conj', 'is_contiguous', 'is_cpu', 'is_cuda', 'is_distributed', 'is_floating_point', 'is_inference', 'is_ipu', 'is_leaf', 'is_maia', 'is_meta', 'is_mkldnn', 'is_mps', 'is_mtia', 'is_neg', 'is_nested', 'is_nonzero', 'is_pinned', 'is_quantized', 'is_same_size', 'is_set_to', 'is_shared', 'is_signed', 'is_sparse', 'is_sparse_csr', 'is_vulkan', 'is_xla', 'is_xpu']
@@ -5650,20 +5650,20 @@ V1205 21:44:05.279000 1657128 torch/_inductor/compile_fx.py:835] {"artifact": {"
 	
 	
 	
-	# torch version: 2.6.0a0+git5f4afda
-	# torch cuda version: 12.1
-	# torch git version: 5f4afda82a5a7a708effa35379140b88511b1f5f
+	# torch version: 2.6.0a0+giteece9ec
+	# torch cuda version: 12.2
+	# torch git version: eece9ecd62cae84bc2f915fc48cffe43e30256aa
 	
 	
 	# CUDA Info: 
 	# nvcc: NVIDIA (R) Cuda compiler driver 
 	# Copyright (c) 2005-2023 NVIDIA Corporation 
-	# Built on Mon_Apr__3_17:16:06_PDT_2023 
-	# Cuda compilation tools, release 12.1, V12.1.105 
-	# Build cuda_12.1.r12.1/compiler.32688072_0 
+	# Built on Tue_Aug_15_22:02:13_PDT_2023 
+	# Cuda compilation tools, release 12.2, V12.2.140 
+	# Build cuda_12.2.r12.2/compiler.33191640_0 
 	
 	# GPU Hardware Info: 
-	# NVIDIA PG509-210 : 8 
+	# NVIDIA H100 : 8 
 	
 	
 	from torch.nn import *
@@ -5714,31 +5714,31 @@ V1205 21:44:05.279000 1657128 torch/_inductor/compile_fx.py:835] {"artifact": {"
 	        # To run it separately, do 
 	        # mod, args = run_repro(mod, load_args, accuracy=False, command='get_args', save_dir=None, tracing_mode='real', check_str=None)
 	        # mod(*args)
-V1205 21:44:05.285000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "501d990e3627cea59e8d48dc397dc200"}
+V1206 15:24:50.866000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "dfa5a083adca30c7330158b04eac504c"}
 	{
 	"name": "_recursive_post_grad_passes",
-	"ts": 1733463845285489.2,
+	"ts": 1733527490866428.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.287000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "2ccb183160410f0fa5812173a8dd2f16"}
+V1206 15:24:50.867000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "92fea8d9f2c2af6ab5a7b7726a25e9c7"}
 	{
 	"name": "_recursive_post_grad_passes",
-	"ts": 1733463845287365.2,
+	"ts": 1733527490867486.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.290000 1657128 torch/_inductor/compile_fx.py:898] {"inductor_post_grad_graph": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ecc276ab6d6e35dc44c5f2bbc091ace0"}
+V1206 15:24:50.868000 1667746 torch/_inductor/compile_fx.py:898] {"inductor_post_grad_graph": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ecc276ab6d6e35dc44c5f2bbc091ace0"}
 	class <lambda>(torch.nn.Module):
 	    def forward(self, arg0_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg1_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg2_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg3_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg4_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg5_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg6_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg7_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg8_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg9_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg10_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
 	         # File: /data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py:1286 in flex_attention, code: out, lse = flex_attention_hop(
@@ -5758,176 +5758,176 @@ V1205 21:44:05.290000 1657128 torch/_inductor/compile_fx.py:898] {"inductor_post
 	            ge: "b8[][]cuda:0" = torch.ops.aten.ge.Tensor(arg2_1, arg3_1);  arg2_1 = arg3_1 = None
 	            return ge
 	            
-V1205 21:44:05.291000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "cae89e19dd2fba2d1bb83dd53378c18c"}
+V1206 15:24:50.869000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "2c4e2b8ec85ec846c7bfb5687ddbf4eb"}
 	{
 	"name": "GraphLowering.run",
-	"ts": 1733463845291509.8,
+	"ts": 1733527490869198.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.326000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "92dee6120c198dfb5bcc6712be12f6a4"}
+V1206 15:24:50.889000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "4cf953068399b7703999c88a61b534a8"}
 	{
 	"name": "GraphLowering.run",
-	"ts": 1733463845326283.5,
+	"ts": 1733527490889235.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.326000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "5d00e83149a19426752dc5e0d10d5da7"}
+V1206 15:24:50.889000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "8512f5af880647d00bdcc6cb55c979b2"}
 	{
 	"name": "GraphLowering.compile_to_fn",
-	"ts": 1733463845326923.0,
+	"ts": 1733527490889665.2,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.327000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "f8f5cb2948c0071225be48cf55f54ada"}
+V1206 15:24:50.889000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ba5d98c4ab6385588c824e9ba44d02fe"}
 	{
 	"name": "code_gen",
-	"ts": 1733463845327383.5,
+	"ts": 1733527490889960.8,
 	"args": {
 	"fn_name": "GraphLowering.compile_to_module",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.327000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "2e5b426eb00d57fb2bff4f69c9fd6d45"}
+V1206 15:24:50.890000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d2916632a86a0b07c4acc6705aa100b6"}
 	{
 	"name": "GraphLowering.codegen",
-	"ts": 1733463845327788.5,
+	"ts": 1733527490890197.0,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.329000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "38fcf442232cbdbf98f8248aa52774d0"}
+V1206 15:24:50.891000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "e0af71ca8453aaa26c1d6ae93f92a7e0"}
 	{
 	"name": "Scheduler.__init__",
-	"ts": 1733463845329192.5,
+	"ts": 1733527490891018.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.331000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "dd718b61c0701955c8849db775ac739b"}
+V1206 15:24:50.892000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "6c816233fc4897d4d8763de32b6039c5"}
 	{
 	"name": "Scheduler.fused_nodes",
-	"ts": 1733463845331634.2,
+	"ts": 1733527490892516.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.332000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "006ab8395d463c8abd4a1ffb97535142"}
+V1206 15:24:50.892000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "398867777111a34eaebfec44bc9bc0e0"}
 	{
 	"name": "Scheduler.fused_nodes",
-	"ts": 1733463845332147.2,
+	"ts": 1733527490892802.2,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.336000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "230de525b07acb49acef59d3843da496"}
+V1206 15:24:50.895000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "4348580c03d299cf39e06d0f77951243"}
 	{
 	"name": "Scheduler.__init__",
-	"ts": 1733463845336557.2,
+	"ts": 1733527490895243.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.336000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "34ea4e3cb5f5a7b287b3c27423a3ecdc"}
+V1206 15:24:50.895000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "37369aaefd17f9a63be8fcd31ae1e98c"}
 	{
 	"name": "Scheduler.codegen",
-	"ts": 1733463845336905.2,
+	"ts": 1733527490895454.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.344000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "8c2b03946b84aaa19281c516151f39bc"}
+V1206 15:24:50.900000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "1b9346463f71bc6d13a662a5be2be40b"}
 	{
 	"name": "Scheduler.codegen",
-	"ts": 1733463845344893.0,
+	"ts": 1733527490900442.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.345000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "931bd2dac3bbc46108d46d2d193208ce"}
+V1206 15:24:50.900000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "54ce92e7f7ba26f1e39c7094e3194d16"}
 	{
 	"name": "PythonWrapperCodegen.generate",
-	"ts": 1733463845345236.5,
+	"ts": 1733527490900664.2,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.347000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "a1c2ec50dd2288ec66020dc5d27aaa7a"}
+V1206 15:24:50.901000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "505184b3d58c91c853c86f14cf8b9246"}
 	{
 	"name": "PythonWrapperCodegen.generate",
-	"ts": 1733463845347473.5,
+	"ts": 1733527490901874.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.347000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "043ca749a7682c337a4fbf667bfa3409"}
+V1206 15:24:50.902000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "fac85554adaad8ec3734fccc5ed505af"}
 	{
 	"name": "GraphLowering.codegen",
-	"ts": 1733463845347861.8,
+	"ts": 1733527490902086.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.348000 1657128 torch/_inductor/graph.py:2030] {"inductor_output_code": {"filename": "/tmp/tmph3za5c24/ep/cep74gpsx44qzaq6lxihnqloij77u3l3nqcr4w3lgh7nyqwfwo5t.py"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "f113446c31695a9ac0252e8d43827348"}
+V1206 15:24:50.902000 1667746 torch/_inductor/graph.py:2030] {"inductor_output_code": {"filename": "/tmp/tmpsn77tlsm/gg/cgg6gukzbkegr5nqxvu5c6limjypanrlgt5z3sah5y2lzsjiugp7.py"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "45e11dd29b32835a24d78e4a6ab62a07"}
 	# AOT ID: ['2_inference']
 	from ctypes import c_void_p, c_long, c_int
 	import torch
@@ -5969,7 +5969,7 @@ V1205 21:44:05.348000 1657128 torch/_inductor/graph.py:2030] {"inductor_output_c
 	empty_strided_p2p = torch._C._distributed_c10d._SymmetricMemory.empty_strided_p2p
 	
 	
-	# kernel path: /tmp/tmph3za5c24/t2/ct2z5axmojizps5hoe6ruvggk65mesyvpux677c2jus4jvcoaeqr.py
+	# kernel path: /tmp/tmpsn77tlsm/4s/c4sfc3fngwlnbougmdi6kqtjf5vlgrowumdc7a2kkh5gxxav655m.py
 	# Topologically Sorted Source Nodes: [flex_attention], Original ATen: []
 	# Source node to ATen node mapping:
 	#   flex_attention => flex_attention
@@ -5987,8 +5987,8 @@ V1205 21:44:05.348000 1657128 torch/_inductor/graph.py:2030] {"inductor_output_c
 	@triton_heuristics.template(
 	    num_stages=3,
 	    num_warps=4,
-	    triton_meta={'signature': {'arg_Q': '*fp32', 'arg_K': '*fp32', 'arg_V': '*fp32', 'arg_LSE': '*fp32', 'arg_KV_NUM_BLKS': '*i32', 'arg_KV_IDX': '*i32', 'arg_FULL_KV_NUM_BLKS': '*i32', 'arg_FULL_KV_IDX': '*i32', 'out_ptr0': '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=80, major=8, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=108, warp_size=32), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8), equal_to_1=())]},
-	    inductor_meta={'kernel_name': 'triton_tem_fused_0', 'backend_hash': '59CDC28C5AC44AE92A1C88C87D935A8E2ADCDA7AF532B6269756431229691604', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': False, 'autotune_pointwise': True, 'autotune_remote_cache': False, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	    triton_meta={'signature': {'arg_Q': '*fp32', 'arg_K': '*fp32', 'arg_V': '*fp32', 'arg_LSE': '*fp32', 'arg_KV_NUM_BLKS': '*i32', 'arg_KV_IDX': '*i32', 'arg_FULL_KV_NUM_BLKS': '*i32', 'arg_FULL_KV_IDX': '*i32', 'out_ptr0': '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132, warp_size=32), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8), equal_to_1=())]},
+	    inductor_meta={'kernel_name': 'triton_tem_fused_0', 'backend_hash': '562E840D41CEB1D8E51DE726EA7592B0C37A0C6FBD72CF4E958863CEC11D41A7', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': False, 'autotune_pointwise': True, 'autotune_remote_cache': False, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
 	)
 	@triton.jit
 	def triton_tem_fused_0(arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0):
@@ -6482,109 +6482,109 @@ V1205 21:44:05.348000 1657128 torch/_inductor/graph.py:2030] {"inductor_output_c
 	    from torch._inductor.wrapper_benchmark import compiled_module_main
 	    compiled_module_main('None', benchmark_compiled_module)
 	
-V1205 21:44:05.349000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ff5efcee6af8004c58e62e48a9f0750d"}
+V1206 15:24:50.902000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "7466f2d4d61dfc1690516495f37138a1"}
 	{
 	"name": "PyCodeCache.load_by_key_path",
-	"ts": 1733463845349258.8,
+	"ts": 1733527490902875.0,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:05.373000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ad5c1c9ed509357e5cee9a03b7367e51"}
+V1206 15:24:50.916000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c77ba023033616cfe1f998da2fef4de2"}
 	{
 	"name": "async_compile.wait",
-	"ts": 1733463845372938.5,
+	"ts": 1733527490916172.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:11.250000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "a869d18274efbdb665c7757666956213"}
+V1206 15:24:54.126000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "07f6598362ca233fcf839b83bd69ac34"}
 	{
 	"name": "async_compile.wait",
-	"ts": 1733463851250693.5,
+	"ts": 1733527494126241.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:11.251000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b29354e848e57e8fa3e91816d1d32414"}
+V1206 15:24:54.126000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "7a197b62b8d8d93579b42512d9f7a21f"}
 	{
 	"name": "PyCodeCache.load_by_key_path",
-	"ts": 1733463851251218.8,
+	"ts": 1733527494126712.0,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:11.251000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "8d5575f4a6078756ec9a4e84d6e92044"}
+V1206 15:24:54.127000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "31dde7ddbbe82ef40947971a84c580bc"}
 	{
 	"name": "code_gen",
-	"ts": 1733463851251703.2,
+	"ts": 1733527494127082.2,
 	"args": {
 	"fn_name": "GraphLowering.compile_to_module",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:11.252000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "7f8df70cc5cd9d34d9a39ad4d375a5fc"}
+V1206 15:24:54.127000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "e32bbd039d869c1f2168335fbd92d76f"}
 	{
 	"name": "GraphLowering.compile_to_fn",
-	"ts": 1733463851252177.5,
+	"ts": 1733527494127503.0,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:11.253000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "027f0d317f101785aac9f8145ded7c96"}
+V1206 15:24:54.128000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "fcdee348cde25167c18c8dcbcf951830"}
 	{
 	"name": "TritonBundler.collect",
-	"ts": 1733463851253281.8,
+	"ts": 1733527494128406.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:11.255000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "3fdc10feb9eb89d0da0cc70baefa2481"}
+V1206 15:24:54.129000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "4c9ec5066284075c872b557d2cf8b5e2"}
 	{
 	"name": "TritonBundler.collect",
-	"ts": 1733463851255443.0,
+	"ts": 1733527494129814.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:11.268000 1657128 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "27622ad4bb86d565f3befdbdc6eebd26"}
+V1206 15:24:54.134000 1667746 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ab69c518495e835b07c9f8fc02777882"}
 	{
 	"name": "fx_graph_cache_miss",
-	"ts": 1733463845265316.5,
+	"ts": 1733527490856164.5,
 	"args": {
-	"key": "floiojpqilmuqtq332gt66ewtqzessshp5va2kwxmofxdpgoysi7",
+	"key": "fmjzafxtfmjyhhkl3afwwy3gjat2obuydjrd6epcvjw6l5s2v7yd",
 	"components": [
 	"[osxu7jvxrxvfgwz7co25iilkcoi7r6wwkfxjkdf7i35nrmqqnas] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`",
 	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
@@ -6620,10 +6620,10 @@ V1205 21:44:11.268000 1657128 torch/_dynamo/utils.py:1327] {"chromium_event": {}
 	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10",
 	"[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)",
 	"[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)",
-	"[lazrphiwnez3ji6des6gtc4njrnab7zwbev2y4zmtejzolkeonc] torch_version: <bytes>",
-	"[c3z7bmoxyo6gl5hi47v6dc7jwsl55b3asd75nr25uyengi5ah3p] system_info[device]: {'name': 'NVIDIA PG509-210'}",
-	"[4tdxvgx6mly7ezamz2ha3km5vsr3c2li4xy77jtowhccraezut3] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-8d217ee87b6d439b8ab320cb84969cbec81a3692993e42439efbd2555077cdbd-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.1'}",
-	"[vuha4ftpp7xvpssr5oo6hre5gds5luljde44u6n6dahpycrz46w] system_info[hash]: bd18ba19a73d872953f4b194a13bfda93fc87d9d602faf8815918f646b14fef9",
+	"[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>",
+	"[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}",
+	"[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}",
+	"[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d",
 	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False",
 	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True",
 	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False",
@@ -6879,11 +6879,11 @@ V1205 21:44:11.268000 1657128 torch/_dynamo/utils.py:1327] {"chromium_event": {}
 	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None",
 	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"
 	],
-	"cache_event_time": 1733463845278101993,
+	"cache_event_time": 1733527490862706044,
 	"cache_state": "miss",
 	"triton_bundler_meta": "TritonBundlerMetadata(cached_kernel_names=['triton_tem_fused_0'])",
-	"time_taken_ns": 5987933926,
-	"compile_id": "-/1/0"
+	"time_taken_ns": 3272216042,
+	"compile_id": "1/0"
 	},
 	"ph": "i",
 	"cat": "dynamo_timed",
@@ -6891,19 +6891,19 @@ V1205 21:44:11.268000 1657128 torch/_dynamo/utils.py:1327] {"chromium_event": {}
 	"pid": 0,
 	"s": "p"
 	}
-V1205 21:44:11.268000 1657128 torch/_inductor/compile_fx.py:751] {"artifact": {"name": "fx_graph_cache_miss", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c284d8d616fa36e6dff8f27c20993e41"}
-	{"key": "floiojpqilmuqtq332gt66ewtqzessshp5va2kwxmofxdpgoysi7", "components": ["[osxu7jvxrxvfgwz7co25iilkcoi7r6wwkfxjkdf7i35nrmqqnas] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[3]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[4]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[5]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[6]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[7]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[8]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[9]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[10]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", "[lmglpn4zi7vob56n34r2j2rk7flv5xfgrcvmo7xcpirqsitygqx] fx_kwargs[boxed_forward_device_index]: BoxedDeviceIndex(value=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", "[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] fx_kwargs[static_input_idxs]: []", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inputs_to_check[1]: 1", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inputs_to_check[2]: 2", "[kcuxe2zwm3mzv2uk6adm6iskoy35bqfv725twacrdewod2dbl5d] inputs_to_check[3]: 3", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inputs_to_check[4]: 4", "[qs5hilycp4ew4ivtc7m5jaxp7q4pm5slioxw3fi3ur6ei65ybz4] inputs_to_check[5]: 5", "[agkvbkaha53nbz3aeeuhvxjvvc4glhfjofzkg6g2qjoo2e5otcx] inputs_to_check[6]: 6", "[j3s5elu6itwgjafc7rzhy4whrbufl6kfmlufjhh25grt643bk5f] inputs_to_check[7]: 7", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inputs_to_check[8]: 8", "[qlgfiyqewrmkgqth2qm6wkq2ja5lzkapg3ypgnvoyfqqnidaoj3] inputs_to_check[9]: 9", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10", "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", "[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)", "[lazrphiwnez3ji6des6gtc4njrnab7zwbev2y4zmtejzolkeonc] torch_version: <bytes>", "[c3z7bmoxyo6gl5hi47v6dc7jwsl55b3asd75nr25uyengi5ah3p] system_info[device]: {'name': 'NVIDIA PG509-210'}", "[4tdxvgx6mly7ezamz2ha3km5vsr3c2li4xy77jtowhccraezut3] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-8d217ee87b6d439b8ab320cb84969cbec81a3692993e42439efbd2555077cdbd-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.1'}", "[vuha4ftpp7xvpssr5oo6hre5gds5luljde44u6n6dahpycrz46w] system_info[hash]: bd18ba19a73d872953f4b194a13bfda93fc87d9d602faf8815918f646b14fef9", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fx_graph_remote_cache]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_local_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_remote_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[bundled_autotune_remote_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[custom_op_default_layout_constraint]: needs_fixed_stride_order", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {}", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True", "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128", "[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False", "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", "[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096", "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", "[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {}", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: []", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"], "cache_event_time": 1733463845278101993, "cache_state": "miss", "triton_bundler_meta": "TritonBundlerMetadata(cached_kernel_names=['triton_tem_fused_0'])", "time_taken_ns": 5987933926, "compile_id": "-/1/0"}
-V1205 21:44:11.269000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c2c5bb6700f5d23f7dc10af732246465"}
+V1206 15:24:54.135000 1667746 torch/_inductor/compile_fx.py:751] {"artifact": {"name": "fx_graph_cache_miss", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c9c893906731ce022aed26f875fcd820"}
+	{"key": "fmjzafxtfmjyhhkl3afwwy3gjat2obuydjrd6epcvjw6l5s2v7yd", "components": ["[osxu7jvxrxvfgwz7co25iilkcoi7r6wwkfxjkdf7i35nrmqqnas] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[3]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[4]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[5]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[6]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[7]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[8]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[9]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[10]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", "[lmglpn4zi7vob56n34r2j2rk7flv5xfgrcvmo7xcpirqsitygqx] fx_kwargs[boxed_forward_device_index]: BoxedDeviceIndex(value=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", "[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] fx_kwargs[static_input_idxs]: []", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inputs_to_check[1]: 1", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inputs_to_check[2]: 2", "[kcuxe2zwm3mzv2uk6adm6iskoy35bqfv725twacrdewod2dbl5d] inputs_to_check[3]: 3", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inputs_to_check[4]: 4", "[qs5hilycp4ew4ivtc7m5jaxp7q4pm5slioxw3fi3ur6ei65ybz4] inputs_to_check[5]: 5", "[agkvbkaha53nbz3aeeuhvxjvvc4glhfjofzkg6g2qjoo2e5otcx] inputs_to_check[6]: 6", "[j3s5elu6itwgjafc7rzhy4whrbufl6kfmlufjhh25grt643bk5f] inputs_to_check[7]: 7", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inputs_to_check[8]: 8", "[qlgfiyqewrmkgqth2qm6wkq2ja5lzkapg3ypgnvoyfqqnidaoj3] inputs_to_check[9]: 9", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10", "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", "[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)", "[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>", "[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}", "[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}", "[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fx_graph_remote_cache]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_local_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_remote_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[bundled_autotune_remote_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[custom_op_default_layout_constraint]: needs_fixed_stride_order", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {}", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True", "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128", "[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False", "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", "[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096", "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", "[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {}", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: []", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"], "cache_event_time": 1733527490862706044, "cache_state": "miss", "triton_bundler_meta": "TritonBundlerMetadata(cached_kernel_names=['triton_tem_fused_0'])", "time_taken_ns": 3272216042, "compile_id": "1/0"}
+V1206 15:24:54.135000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "8448954b82f0bb83dace88546d83f2e6"}
 	{
 	"name": "inductor_compile",
-	"ts": 1733463851269207.2,
+	"ts": 1733527494135534.2,
 	"args": {
 	"fn_name": "compile_fx_inner",
-	"compile_id": "-/1/0",
+	"compile_id": "1/0",
 	"is_backward": false,
 	"cache_state": "miss",
-	"cache_event_time": 1733463845265316427,
-	"key": "floiojpqilmuqtq332gt66ewtqzessshp5va2kwxmofxdpgoysi7",
+	"cache_event_time": 1733527490856164558,
+	"key": "fmjzafxtfmjyhhkl3afwwy3gjat2obuydjrd6epcvjw6l5s2v7yd",
 	"components": [
 	"[osxu7jvxrxvfgwz7co25iilkcoi7r6wwkfxjkdf7i35nrmqqnas] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`",
 	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
@@ -6939,10 +6939,10 @@ V1205 21:44:11.269000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10",
 	"[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)",
 	"[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)",
-	"[lazrphiwnez3ji6des6gtc4njrnab7zwbev2y4zmtejzolkeonc] torch_version: <bytes>",
-	"[c3z7bmoxyo6gl5hi47v6dc7jwsl55b3asd75nr25uyengi5ah3p] system_info[device]: {'name': 'NVIDIA PG509-210'}",
-	"[4tdxvgx6mly7ezamz2ha3km5vsr3c2li4xy77jtowhccraezut3] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-8d217ee87b6d439b8ab320cb84969cbec81a3692993e42439efbd2555077cdbd-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.1'}",
-	"[vuha4ftpp7xvpssr5oo6hre5gds5luljde44u6n6dahpycrz46w] system_info[hash]: bd18ba19a73d872953f4b194a13bfda93fc87d9d602faf8815918f646b14fef9",
+	"[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>",
+	"[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}",
+	"[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}",
+	"[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d",
 	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False",
 	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True",
 	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False",
@@ -7207,41 +7207,41 @@ V1205 21:44:11.269000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:11.270000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "a504983f7abea1944a3885ce3f180204"}
+V1206 15:24:54.136000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "a9d90840e9080e38cc5cc8f43d64b9e7"}
 	{
 	"name": "compile_fx.<locals>.fw_compiler_base",
-	"ts": 1733463851270255.5,
+	"ts": 1733527494136361.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:11.273000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "1912026b9a0896eb5de047f5bcec9c80"}
+V1206 15:24:54.138000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "9174f1070d6248c4511951edfe3f1dd1"}
 	{
 	"name": "create_aot_dispatcher_function",
-	"ts": 1733463851273796.8,
+	"ts": 1733527494138570.5,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:11.274000 1657128 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "da59610a1aee4acb535867a3bb44e9d6"}
+V1206 15:24:54.138000 1667746 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "32e73583977fb2c6cb9b34dd32b9a8f7"}
 	{
 	"name": "autograd_cache_bypass",
-	"ts": 1733463845164784.5,
+	"ts": 1733527490809971.2,
 	"args": {
 	"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention",
 	"cache_bypass_hard_exception": false,
 	"key": null,
 	"cache_state": "bypass",
 	"components": [],
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "i",
 	"cat": "dynamo_timed",
@@ -7249,19 +7249,19 @@ V1205 21:44:11.274000 1657128 torch/_dynamo/utils.py:1327] {"chromium_event": {}
 	"pid": 0,
 	"s": "p"
 	}
-V1205 21:44:11.274000 1657128 torch/_functorch/_aot_autograd/autograd_cache.py:763] {"artifact": {"name": "aotautograd_cache_hash", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b55f3a3a16b369b568f062c1351f1185"}
-	{"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention", "cache_bypass_hard_exception": false, "key": null, "cache_state": "bypass", "components": [], "compile_id": "-/1/0"}
-V1205 21:44:11.274000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "2cc6e1483ccedffb7cde0f0b6156e154"}
+V1206 15:24:54.138000 1667746 torch/_functorch/_aot_autograd/autograd_cache.py:763] {"artifact": {"name": "aotautograd_cache_hash", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "5601d02186053adcc1ba29fd248c1d20"}
+	{"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention", "cache_bypass_hard_exception": false, "key": null, "cache_state": "bypass", "components": [], "compile_id": "1/0"}
+V1206 15:24:54.139000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "cca36f8b4f9f5ea8ad866b857a634eb8"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463851274807.8,
+	"ts": 1733527494139170.2,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/1/0",
+	"compile_id": "1/0",
 	"requires_subclass_dispatch": false,
 	"dispatch_mode": "inference",
 	"cache_state": "bypass",
-	"cache_event_time": 1733463845164784583,
+	"cache_event_time": 1733527490809971264,
 	"key": null,
 	"components": [],
 	"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention",
@@ -7273,7 +7273,7 @@ V1205 21:44:11.274000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:11.304000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "5bef7ee31f128d0e6d4f83a5abb88b12"}
+V1206 15:24:54.153000 1667746 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b9091c16617d57e52682988042d2b9ea"}
 	
 	TREE_GUARD_MANAGER:
 	+- RootGuardManager
@@ -7281,31 +7281,31 @@ V1205 21:44:11.304000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| +- GLOBAL_STATE: ___check_global_state()
 	| +- TORCH_FUNCTION_MODE_STACK: ___check_torch_function_mode_stack()
 	| +- GuardManager: source=L['k'], accessed_by=DictGetItemGuardAccessor('k')
-	| | +- TYPE_MATCH: ___check_type_id(L['k'], 82291104)                          
+	| | +- TYPE_MATCH: ___check_type_id(L['k'], 82181376)                          
 	| | +- TENSOR_MATCH: check_tensor(L['k'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
 	| | +- NO_HASATTR: hasattr(L['k'], '_dynamo_dynamic_indices') == False         
 	| | +- NO_TENSOR_ALIASING: check_no_aliasing(L['k'], L['q'], L['v'], L['block_mask'].q_indices, L['block_mask'].kv_indices, L['block_mask'].q_num_blocks, L['block_mask'].kv_num_blocks, L['block_mask'].full_q_indices, L['block_mask'].full_kv_indices, L['block_mask'].full_q_num_blocks, L['block_mask'].full_kv_num_blocks)
 	| | +- GuardManager: source=L['k'].dim, accessed_by=GetAttrGuardAccessor(dim)
 	| | +- GuardManager: source=L['k'].size, accessed_by=GetAttrGuardAccessor(size)
 	| +- GuardManager: source=L['q'], accessed_by=DictGetItemGuardAccessor('q')
-	| | +- TYPE_MATCH: ___check_type_id(L['q'], 82291104)                          
+	| | +- TYPE_MATCH: ___check_type_id(L['q'], 82181376)                          
 	| | +- TENSOR_MATCH: check_tensor(L['q'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
 	| | +- NO_HASATTR: hasattr(L['q'], '_dynamo_dynamic_indices') == False         
 	| | +- NO_TENSOR_ALIASING
 	| | +- GuardManager: source=L['q'].dim, accessed_by=GetAttrGuardAccessor(dim)
 	| | +- GuardManager: source=L['q'].size, accessed_by=GetAttrGuardAccessor(size)
 	| +- GuardManager: source=L['v'], accessed_by=DictGetItemGuardAccessor('v')
-	| | +- TYPE_MATCH: ___check_type_id(L['v'], 82291104)                          
+	| | +- TYPE_MATCH: ___check_type_id(L['v'], 82181376)                          
 	| | +- TENSOR_MATCH: check_tensor(L['v'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
 	| | +- NO_HASATTR: hasattr(L['v'], '_dynamo_dynamic_indices') == False         
 	| | +- NO_TENSOR_ALIASING
 	| | +- GuardManager: source=L['v'].dim, accessed_by=GetAttrGuardAccessor(dim)
 	| | +- GuardManager: source=L['v'].size, accessed_by=GetAttrGuardAccessor(size)
 	| +- GuardManager: source=L['block_mask'], accessed_by=DictGetItemGuardAccessor('block_mask')
-	| | +- TYPE_MATCH: ___check_type_id(L['block_mask'], 396207408)                
+	| | +- TYPE_MATCH: ___check_type_id(L['block_mask'], 139679664)                
 	| | +- GuardManager: source=L['block_mask'].mask_mod, accessed_by=GetAttrGuardAccessor(mask_mod)
 	| | | +- GuardManager: source=L['block_mask'].mask_mod.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | +- ID_MATCH: ___check_obj_id(L['block_mask'].mask_mod.__code__, 139667217696224)
+	| | | | +- ID_MATCH: ___check_obj_id(L['block_mask'].mask_mod.__code__, 140062268556768)
 	| | +- GuardManager: source=L['block_mask'].q_indices, accessed_by=GetAttrGuardAccessor(q_indices)
 	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].q_indices, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
 	| | | +- NO_HASATTR: hasattr(L['block_mask'].q_indices, '_dynamo_dynamic_indices') == False
@@ -7351,10 +7351,10 @@ V1205 21:44:11.304000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | | | +- ID_MATCH: ___check_obj_id(L['block_mask'].as_tuple.__defaults__[0], 8908032)
 	| +- GuardManager: source=L['score_mod2'], accessed_by=DictGetItemGuardAccessor('score_mod2')
 	| | +- GuardManager: source=L['score_mod2'].__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | +- ID_MATCH: ___check_obj_id(L['score_mod2'].__code__, 139667218246576)  
+	| | | +- ID_MATCH: ___check_obj_id(L['score_mod2'].__code__, 140062269255152)  
 	| +- GuardManager: source=L['flex_attention'], accessed_by=DictGetItemGuardAccessor('flex_attention')
 	| | +- GuardManager: source=L['flex_attention'].__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__code__, 397452288)    
+	| | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__code__, 139318784)    
 	| | +- GuardManager: source=L['flex_attention'], accessed_by=FuncDefaultsGuardAccessor
 	| | | +- GuardManager: source=L['flex_attention'].__defaults__[2], accessed_by=GetItemGuardAccessor(2)
 	| | | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__defaults__[2], 8822752)
@@ -7365,17 +7365,17 @@ V1205 21:44:11.304000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | +- GuardManager: source=L['flex_attention'].__defaults__[5], accessed_by=GetItemGuardAccessor(5)
 	| | | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__defaults__[5], 8822752)
 	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
-	| | +- GuardManager: source=G['_139667213076496_c5'], accessed_by=DictGetItemGuardAccessor('_139667213076496_c5')
-	| | | +- GuardManager: source=G['_139667213076496_c5'].Tensor, accessed_by=GetAttrGuardAccessor(Tensor)
-	| | | | +- ID_MATCH: ___check_obj_id(G['_139667213076496_c5'].Tensor, 82291104)  
-	| | | | +- GuardManager: source=G['_139667213076496_c5'].Tensor.__bases__, accessed_by=GetAttrGuardAccessor(__bases__)
-	| | | | | +- GuardManager: source=G['_139667213076496_c5'].Tensor.__bases__[0], accessed_by=TupleGetItemGuardAccessor(0)
-	| | | | | | +- ID_MATCH: ___check_obj_id(G['_139667213076496_c5'].Tensor.__bases__[0], 139667196393888)
+	| | +- GuardManager: source=G['_140062263790704_c5'], accessed_by=DictGetItemGuardAccessor('_140062263790704_c5')
+	| | | +- GuardManager: source=G['_140062263790704_c5'].Tensor, accessed_by=GetAttrGuardAccessor(Tensor)
+	| | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c5'].Tensor, 82181376)  
+	| | | | +- GuardManager: source=G['_140062263790704_c5'].Tensor.__bases__, accessed_by=GetAttrGuardAccessor(__bases__)
+	| | | | | +- GuardManager: source=G['_140062263790704_c5'].Tensor.__bases__[0], accessed_by=TupleGetItemGuardAccessor(0)
+	| | | | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c5'].Tensor.__bases__[0], 140062119703136)
 	| | +- GuardManager: source=G['__builtins_dict___10'], accessed_by=DictGetItemGuardAccessor('__builtins_dict___10')
 	| | | +- GuardManager: source=G['__builtins_dict___10']['len'], accessed_by=DictGetItemGuardAccessor('len')
-	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___10']['len'], 139667218584480)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___10']['len'], 140062269592480)
 	| | | +- GuardManager: source=G['__builtins_dict___10']['sum'], accessed_by=DictGetItemGuardAccessor('sum')
-	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___10']['sum'], 139667218585600)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___10']['sum'], 140062269593600)
 	| | | +- GuardManager: source=G['__builtins_dict___10']['list'], accessed_by=DictGetItemGuardAccessor('list')
 	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___10']['list'], 8841312) 
 	| | | +- GuardManager: source=G['__builtins_dict___10']['type'], accessed_by=DictGetItemGuardAccessor('type')
@@ -7385,16 +7385,16 @@ V1205 21:44:11.304000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | +- GuardManager: source=G['__builtins_dict___10']['object'], accessed_by=DictGetItemGuardAccessor('object')
 	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___10']['object'], 8810976)
 	| | | +- GuardManager: source=G['__builtins_dict___10']['isinstance'], accessed_by=DictGetItemGuardAccessor('isinstance')
-	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___10']['isinstance'], 139667218584160)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___10']['isinstance'], 140062269592160)
 	| | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_utils_dot__pytree')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'], 139665266915776)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'], 140057600329744)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].TreeSpec, accessed_by=GetAttrGuardAccessor(TreeSpec)
-	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].TreeSpec, 87414528)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].TreeSpec, 86791872)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf, accessed_by=GetAttrGuardAccessor(_is_leaf)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, 139665266954544)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, 140057601749808)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, accessed_by=GetAttrGuardAccessor(_LEAF_SPEC)
-	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, 87437744)
+	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, 87025712)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, accessed_by=GetAttrGuardAccessor(type)
 	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, 8822752)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.context, accessed_by=GetAttrGuardAccessor(context)
@@ -7410,31 +7410,31 @@ V1205 21:44:11.304000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | | | +- LENGTH_CHECK: not G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type, accessed_by=GetAttrGuardAccessor(_get_node_type)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, 139665266954272)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, 140057601749536)
 	| | | +- DictGuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES, accessed_by=GetAttrGuardAccessor(SUPPORTED_NODES)
-	| | | | +- DICT_VERSION: ___dict_version(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES) == 431080
+	| | | | +- DICT_VERSION: ___dict_version(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES) == 366913
 	| | | | +- KeyValueManager pair at index=1
 	| | | | | +- ValueManager: GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]]
 	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn, accessed_by=GetAttrGuardAccessor(flatten_fn)
 	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, 139665266871808)
+	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, 140057599989696)
 	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn, accessed_by=GetAttrGuardAccessor(unflatten_fn)
 	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, 139665266552240)
+	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, 140057600267792)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper, accessed_by=GetAttrGuardAccessor(_tree_flatten_helper)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, 139665266608368)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, 140057600001392)
 	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance, accessed_by=GetAttrGuardAccessor(_is_namedtuple_instance)
 	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, 139665266607920)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, 140057600000944)
 	| | +- GuardManager: source=G['__import_torch_dot__dynamo_dot_comptime'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot__dynamo_dot_comptime')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_comptime'], 139664219014784)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_comptime'], 140057359527872)
 	| | +- GuardManager: source=G['__import_torch_dot__dynamo_dot_decorators'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot__dynamo_dot_decorators')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_decorators'], 139664218897952)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_decorators'], 140057359526192)
 	| | | +- GuardManager: source=G['__import_torch_dot__dynamo_dot_decorators'].is_compiling, accessed_by=GetAttrGuardAccessor(is_compiling)
-	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_decorators'].is_compiling, 139664343945664)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_decorators'].is_compiling, 140057422814624)
 	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_nn_dot_attention_dot__utils')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot__utils'], 139663431923360)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot__utils'], 140048551571072)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS, accessed_by=GetAttrGuardAccessor(_SUPPORTED_HEAD_DIMS)
 	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS, 8841312)
 	| | | | +- LENGTH_CHECK: len(G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS) == 10
@@ -7451,67 +7451,67 @@ V1205 21:44:11.304000 1657128 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[5], accessed_by=ListGetItemGuardAccessor(5)
 	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[5] == 64
 	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_nn_dot_attention_dot_flex_attention')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'], 139663431922960)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'], 140048551568912)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math, accessed_by=GetAttrGuardAccessor(math)
-	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math, 139667216591312)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math, 140062267404384)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math.sqrt, accessed_by=GetAttrGuardAccessor(sqrt)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math.sqrt, 139667216595152)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math.sqrt, 140062267408144)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch, accessed_by=GetAttrGuardAccessor(torch)
-	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch, 139667213076496)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch, 140062263790704)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo, accessed_by=GetAttrGuardAccessor(_dynamo)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo, 139667211310544)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo, 140062262057760)
 	| | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static, accessed_by=GetAttrGuardAccessor(mark_static)
 	| | | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static.__code__, 107696464)
+	| | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static.__code__, 125680000)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler, accessed_by=GetAttrGuardAccessor(compiler)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler, 139664804054656)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler, 140057571877776)
 	| | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler.is_dynamo_compiling, accessed_by=GetAttrGuardAccessor(is_dynamo_compiling)
-	| | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler.is_dynamo_compiling, 139664804198208)
+	| | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler.is_dynamo_compiling, 140057572232544)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.is_grad_enabled, accessed_by=GetAttrGuardAccessor(is_grad_enabled)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.is_grad_enabled, 139667201822368)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.is_grad_enabled, 140062253115936)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device, accessed_by=GetAttrGuardAccessor(_validate_device)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device.__code__, 139663431699312)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device.__code__, 140048552018544)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop, accessed_by=GetAttrGuardAccessor(flex_attention_hop)
-	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop, 97810240)
+	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop, 96230624)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop.__name__, accessed_by=GetAttrGuardAccessor(__name__)
 	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop.__name__ == 'flex_attention'
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim, accessed_by=GetAttrGuardAccessor(_supported_head_dim)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim.__code__, 139663431174224)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim.__code__, 140048552864992)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim, accessed_by=GetAttrGuardAccessor(_validate_embed_dim)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim.__code__, 395883648)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim.__code__, 139781872)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness, accessed_by=GetAttrGuardAccessor(_validate_nestedness)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness.__code__, 139663466500208)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness.__code__, 140048553100560)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input, accessed_by=GetAttrGuardAccessor(_validate_sdpa_input)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input.__code__, 76825296)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input.__code__, 139823744)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options, accessed_by=GetAttrGuardAccessor(_apply_kernel_options)
 	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options.__code__, accessed_by=GetAttrGuardAccessor(__code__)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options.__code__, 139663477636016)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options.__code__, 140048554555888)
 	
-V1205 21:44:11.305000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "67e872246c2cfcde3992446c7af0f6c4"}
+V1206 15:24:54.154000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "f5553f623cc33ff3da54884bffba4841"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463851304987.2,
+	"ts": 1733527494154160.5,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:44:11.309000 1657128 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"pre_grad_pass_time_us": 988, "joint_graph_pass_time_us": 2028, "feature_usage": {"pytorch/remote_cache:fx_graph_memcache_version": true, "pytorch/remote_cache:bundle_triton_into_fx_graph_cache_v2": true}, "post_grad_pass_time_us": 1876, "inductor_code_gen_cumulative_compile_time_us": 5924319, "inductor_cumulative_compile_time_us": 6005368, "aot_autograd_cumulative_compile_time_us": 6116171, "dynamo_cumulative_compile_time_us": 6324649, "frame_key": "2", "co_name": "fn2", "co_filename": "/data/users/xmfan/a/pytorch/test/inductor/test_codecache.py", "co_firstlineno": 465, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 91, "shape_env_guard_count": 0, "graph_op_count": 2, "graph_node_count": 16, "graph_input_count": 11, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch.testing\", \"torch._decomp\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:44:11.309000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "58fee530d6f84959fe54799b7e377709"}
+V1206 15:24:54.156000 1667746 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"pre_grad_pass_time_us": 526, "joint_graph_pass_time_us": 1028, "feature_usage": {"pytorch/remote_cache:fx_graph_memcache_version": true, "pytorch/remote_cache:bundle_triton_into_fx_graph_cache_v2": true}, "post_grad_pass_time_us": 1058, "inductor_code_gen_cumulative_compile_time_us": 3237121, "inductor_cumulative_compile_time_us": 3280008, "aot_autograd_cumulative_compile_time_us": 3332164, "dynamo_cumulative_compile_time_us": 3438591, "frame_key": "2", "co_name": "fn2", "co_filename": "/data/users/xmfan/a/pytorch/test/inductor/test_codecache.py", "co_firstlineno": 465, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 91, "shape_env_guard_count": 0, "graph_op_count": 2, "graph_node_count": 16, "graph_input_count": 11, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:24:54.156000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "0fdda1350847e58c2900e8c2a60a9a2b"}
 	{
 	"name": "dynamo",
-	"ts": 1733463851309821.0,
+	"ts": 1733527494156938.5,
 	"args": {
-	"compile_id": "-/1/0",
+	"compile_id": "1/0",
 	"frame_key": "2",
 	"co_name": "fn2",
 	"co_filename": "/data/users/xmfan/a/pytorch/test/inductor/test_codecache.py",
@@ -7532,10 +7532,10 @@ V1205 21:44:11.309000 1657128 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"restart_reasons": [],
 	"dynamo_time_before_restart_s": 0.0,
 	"has_guarded_code": true,
-	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch.testing\", \"torch._decomp\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
+	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
-	}
\ No newline at end of file
+	}
diff --git a/tests/inputs/chromium_events.log b/tests/inputs/chromium_events.log
index e77db83..91bb76b 100644
--- a/tests/inputs/chromium_events.log
+++ b/tests/inputs/chromium_events.log
@@ -1,140 +1,140 @@
-V1205 20:51:09.480000 84489 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "8a087f75e4429388fe8ab28c15d36dd4"}
+V1206 15:20:13.926000 1543231 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "bb6a3dc5077f86b72cd9a27f294c8718"}
 	{
 	"name": "dynamo",
-	"ts": 1733460669479996.0,
+	"ts": 1733527213926572.8,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 20:51:09.485000 84489 torch/_logging/structured.py:22] {"str": ["/Users/xmfan/core/pytorch/torch/_dynamo/convert_frame.py", 0]}
-V1205 20:51:09.485000 84489 torch/_logging/structured.py:22] {"str": ["/Users/xmfan/core/pytorch/test/dynamo/test_misc.py", 1]}
-V1205 20:51:09.486000 84489 torch/_logging/structured.py:22] {"str": ["/Users/xmfan/core/pytorch/torch/_dynamo/test_case.py", 2]}
-V1205 20:51:09.486000 84489 torch/_logging/structured.py:22] {"str": ["/Users/xmfan/core/pytorch/torch/testing/_internal/common_utils.py", 3]}
-V1205 20:51:09.486000 84489 torch/_logging/structured.py:22] {"str": ["/Users/xmfan/anaconda3/lib/python3.12/unittest/main.py", 4]}
-V1205 20:51:09.487000 84489 torch/_logging/structured.py:22] {"str": ["/Users/xmfan/anaconda3/lib/python3.12/unittest/runner.py", 5]}
-V1205 20:51:09.487000 84489 torch/_logging/structured.py:22] {"str": ["/Users/xmfan/anaconda3/lib/python3.12/unittest/suite.py", 6]}
-V1205 20:51:09.487000 84489 torch/_logging/structured.py:22] {"str": ["/Users/xmfan/anaconda3/lib/python3.12/unittest/case.py", 7]}
-V1205 20:51:09.488000 84489 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 12143, "name": "<module>", "filename": 1}, {"line": 39, "name": "run_tests", "filename": 2}, {"line": 1353, "name": "run_tests", "filename": 3}, {"line": 105, "name": "__init__", "filename": 4}, {"line": 281, "name": "runTests", "filename": 4}, {"line": 240, "name": "run", "filename": 5}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 690, "name": "__call__", "filename": 7}, {"line": 3234, "name": "run", "filename": 3}, {"line": 3206, "name": "_run_custom", "filename": 3}, {"line": 634, "name": "run", "filename": 7}, {"line": 589, "name": "_callTestMethod", "filename": 7}, {"line": 3099, "name": "wrapper", "filename": 3}, {"line": 10765, "name": "test_graph_break_compilation_metrics_on_failure", "filename": 1}, {"line": 10755, "name": "fn", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 20:51:09.488000 84489 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "69cd02253ebc47d4fa507d5b4d9d0842"}
+V1206 15:20:13.927000 1543231 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/convert_frame.py", 0]}
+V1206 15:20:13.927000 1543231 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py", 1]}
+V1206 15:20:13.927000 1543231 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/test_case.py", 2]}
+V1206 15:20:13.927000 1543231 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/testing/_internal/common_utils.py", 3]}
+V1206 15:20:13.927000 1543231 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/main.py", 4]}
+V1206 15:20:13.927000 1543231 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/runner.py", 5]}
+V1206 15:20:13.927000 1543231 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/suite.py", 6]}
+V1206 15:20:13.928000 1543231 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/case.py", 7]}
+V1206 15:20:13.928000 1543231 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 12143, "name": "<module>", "filename": 1}, {"line": 39, "name": "run_tests", "filename": 2}, {"line": 1353, "name": "run_tests", "filename": 3}, {"line": 102, "name": "__init__", "filename": 4}, {"line": 274, "name": "runTests", "filename": 4}, {"line": 217, "name": "run", "filename": 5}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 678, "name": "__call__", "filename": 7}, {"line": 3234, "name": "run", "filename": 3}, {"line": 3206, "name": "_run_custom", "filename": 3}, {"line": 623, "name": "run", "filename": 7}, {"line": 579, "name": "_callTestMethod", "filename": 7}, {"line": 3099, "name": "wrapper", "filename": 3}, {"line": 10765, "name": "test_graph_break_compilation_metrics_on_failure", "filename": 1}, {"line": 10755, "name": "fn", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:20:13.928000 1543231 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "2a27101ddf2d6616698cc3633c77fc3f"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733460669488491.0,
+	"ts": 1733527213928251.2,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 20:51:09.536000 84489 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 0, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 20:51:09.536000 84489 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cpu')", "size": [4, 4], "is_leaf": true, "stride": [4, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x31e2c9220>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 20:51:09.536000 84489 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 0, "source": "L['x']"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 20:51:09.784000 84489 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_x_": [4, 4], "sin": [4, 4]}}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "6d1fbf3b1d4ce646a78838e4ca2ca921"}
+V1206 15:20:13.931000 1543231 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 0, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:20:13.931000 1543231 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cpu')", "size": [4, 4], "is_leaf": true, "stride": [4, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f2f42df5bb0>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:20:13.931000 1543231 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 0, "source": "L['x']"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:20:14.084000 1543231 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_x_": [4, 4], "sin": [4, 4]}}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "289c7de0221d9f71a5513269eb25c1a2"}
 	class GraphModule(torch.nn.Module):
 	    def forward(self, L_x_: "f32[4, 4][4, 1]cpu"):
 	        l_x_ = L_x_
 	        
-	         # File: /Users/xmfan/core/pytorch/test/dynamo/test_misc.py:10756 in fn, code: return x.sin()
+	         # File: /data/users/xmfan/a/pytorch/test/dynamo/test_misc.py:10756 in fn, code: return x.sin()
 	        sin: "f32[4, 4][4, 1]cpu" = l_x_.sin();  l_x_ = None
 	        return (sin,)
 	        
-V1205 20:51:09.785000 84489 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ee91207e6731de1ba24f062fcdeba7fc"}
+V1206 15:20:14.084000 1543231 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "7f609f2caa4686da5214135d7cf5c6fc"}
 	{
 	"name": "backend_compile",
-	"ts": 1733460669785589.0,
+	"ts": 1733527214084600.2,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 20:51:09.786000 84489 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "f435a2e30e61269c60500090f7aa2f26"}
+V1206 15:20:14.085000 1543231 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "09b2c72fc692bf5a0d9cac524e3ba68b"}
 	{
 	"name": "backend_compile",
-	"ts": 1733460669786130.0,
+	"ts": 1733527214085078.5,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 20:51:09.787000 84489 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "3071feede47d43037efba7d957312b59"}
+V1206 15:20:14.085000 1543231 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "8eb8723e6aafa6856981ca893f6bc0c9"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733460669787053.0,
+	"ts": 1733527214085499.0,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 20:51:09.788000 84489 torch/_dynamo/convert_frame.py:1011] {"artifact": {"name": "dynamo_error", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "f1a69e2df3665f30469651c7d13aa591"}
+V1206 15:20:14.086000 1543231 torch/_dynamo/convert_frame.py:1011] {"artifact": {"name": "dynamo_error", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "8240b625682b3ef80eddf3423a91511c"}
 	Traceback (most recent call last):
-	  File "/Users/xmfan/core/pytorch/torch/_dynamo/convert_frame.py", line 989, in _compile
+	  File "/data/users/xmfan/a/pytorch/torch/_dynamo/convert_frame.py", line 989, in _compile
 	    guarded_code = compile_inner(code, one_graph, hooks, transform)
 	                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-	  File "/Users/xmfan/core/pytorch/torch/_dynamo/convert_frame.py", line 718, in compile_inner
+	  File "/data/users/xmfan/a/pytorch/torch/_dynamo/convert_frame.py", line 718, in compile_inner
 	    return _compile_inner(code, one_graph, hooks, transform)
 	           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-	  File "/Users/xmfan/core/pytorch/torch/_utils_internal.py", line 95, in wrapper_function
+	  File "/data/users/xmfan/a/pytorch/torch/_utils_internal.py", line 95, in wrapper_function
 	    return function(*args, **kwargs)
 	           ^^^^^^^^^^^^^^^^^^^^^^^^^
-	  File "/Users/xmfan/core/pytorch/torch/_dynamo/convert_frame.py", line 753, in _compile_inner
+	  File "/data/users/xmfan/a/pytorch/torch/_dynamo/convert_frame.py", line 753, in _compile_inner
 	    out_code = transform_code_object(code, transform)
 	               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-	  File "/Users/xmfan/core/pytorch/torch/_dynamo/bytecode_transformation.py", line 1361, in transform_code_object
+	  File "/data/users/xmfan/a/pytorch/torch/_dynamo/bytecode_transformation.py", line 1361, in transform_code_object
 	    transformations(instructions, code_options)
-	  File "/Users/xmfan/core/pytorch/torch/_dynamo/convert_frame.py", line 231, in _fn
+	  File "/data/users/xmfan/a/pytorch/torch/_dynamo/convert_frame.py", line 231, in _fn
 	    return fn(*args, **kwargs)
 	           ^^^^^^^^^^^^^^^^^^^
-	  File "/Users/xmfan/core/pytorch/torch/_dynamo/convert_frame.py", line 665, in transform
+	  File "/data/users/xmfan/a/pytorch/torch/_dynamo/convert_frame.py", line 665, in transform
 	    tracer.run()
-	  File "/Users/xmfan/core/pytorch/torch/_dynamo/symbolic_convert.py", line 2864, in run
+	  File "/data/users/xmfan/a/pytorch/torch/_dynamo/symbolic_convert.py", line 2864, in run
 	    super().run()
-	  File "/Users/xmfan/core/pytorch/torch/_dynamo/symbolic_convert.py", line 1053, in run
+	  File "/data/users/xmfan/a/pytorch/torch/_dynamo/symbolic_convert.py", line 1053, in run
 	    while self.step():
 	          ^^^^^^^^^^^
-	  File "/Users/xmfan/core/pytorch/torch/_dynamo/symbolic_convert.py", line 963, in step
+	  File "/data/users/xmfan/a/pytorch/torch/_dynamo/symbolic_convert.py", line 963, in step
 	    self.dispatch_table[inst.opcode](self, inst)
-	  File "/Users/xmfan/core/pytorch/torch/_dynamo/symbolic_convert.py", line 3044, in RETURN_VALUE
+	  File "/data/users/xmfan/a/pytorch/torch/_dynamo/symbolic_convert.py", line 3044, in RETURN_VALUE
 	    self._return(inst)
-	  File "/Users/xmfan/core/pytorch/torch/_dynamo/symbolic_convert.py", line 3029, in _return
+	  File "/data/users/xmfan/a/pytorch/torch/_dynamo/symbolic_convert.py", line 3029, in _return
 	    self.output.compile_subgraph(
-	  File "/Users/xmfan/core/pytorch/torch/_dynamo/output_graph.py", line 1085, in compile_subgraph
+	  File "/data/users/xmfan/a/pytorch/torch/_dynamo/output_graph.py", line 1085, in compile_subgraph
 	    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
-	  File "/Users/xmfan/core/pytorch/torch/_dynamo/output_graph.py", line 1359, in compile_and_call_fx_graph
+	  File "/data/users/xmfan/a/pytorch/torch/_dynamo/output_graph.py", line 1359, in compile_and_call_fx_graph
 	    compiled_fn = self.call_user_compiler(gm)
 	                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^
-	  File "/Users/xmfan/core/pytorch/torch/_dynamo/output_graph.py", line 1409, in call_user_compiler
+	  File "/data/users/xmfan/a/pytorch/torch/_dynamo/output_graph.py", line 1409, in call_user_compiler
 	    return self._call_user_compiler(gm)
 	           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-	  File "/Users/xmfan/core/pytorch/torch/_dynamo/output_graph.py", line 1460, in _call_user_compiler
+	  File "/data/users/xmfan/a/pytorch/torch/_dynamo/output_graph.py", line 1460, in _call_user_compiler
 	    raise BackendCompilerFailed(self.compiler_fn, e).with_traceback(
-	  File "/Users/xmfan/core/pytorch/torch/_dynamo/output_graph.py", line 1439, in _call_user_compiler
+	  File "/data/users/xmfan/a/pytorch/torch/_dynamo/output_graph.py", line 1439, in _call_user_compiler
 	    compiled_fn = compiler_fn(gm, self.example_inputs())
 	                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-	  File "/Users/xmfan/core/pytorch/torch/_dynamo/repro/after_dynamo.py", line 130, in __call__
+	  File "/data/users/xmfan/a/pytorch/torch/_dynamo/repro/after_dynamo.py", line 130, in __call__
 	    compiled_gm = compiler_fn(gm, example_inputs)
 	                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-	  File "/Users/xmfan/core/pytorch/torch/__init__.py", line 2353, in __call__
+	  File "/data/users/xmfan/a/pytorch/torch/__init__.py", line 2353, in __call__
 	    return self.compiler_fn(model_, inputs_, **self.kwargs)
 	           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-	  File "/Users/xmfan/core/pytorch/test/dynamo/test_misc.py", line 10759, in broken_backend
+	  File "/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py", line 10759, in broken_backend
 	    raise RuntimeError("broken backend")
 	torch._dynamo.exc.BackendCompilerFailed: backend='broken_backend' raised:
 	RuntimeError: broken backend
@@ -142,16 +142,16 @@ V1205 20:51:09.788000 84489 torch/_dynamo/convert_frame.py:1011] {"artifact": {"
 	Set TORCH_LOGS="+dynamo" and TORCHDYNAMO_VERBOSE=1 for more information
 	
 	
-V1205 20:51:09.791000 84489 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 541, "dynamo_cumulative_compile_time_us": 298562, "frame_key": "1", "co_name": "fn", "co_filename": "/Users/xmfan/core/pytorch/test/dynamo/test_misc.py", "co_firstlineno": 10755, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": null, "shape_env_guard_count": null, "graph_op_count": null, "graph_node_count": null, "graph_input_count": null, "fail_type": "BackendCompilerFailed", "fail_reason": "backend='broken_backend' raised:\nRuntimeError: broken backend", "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.300835, "has_guarded_code": false, "config_suppress_errors": true, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": true, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch.distributions\", \"torch._refs\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 300835}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 20:51:09.792000 84489 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "f0107b2ef7db385bd145761ffe11f62e"}
+V1206 15:20:14.089000 1543231 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 478, "dynamo_cumulative_compile_time_us": 157247, "frame_key": "1", "co_name": "fn", "co_filename": "/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py", "co_firstlineno": 10755, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": null, "shape_env_guard_count": null, "graph_op_count": null, "graph_node_count": null, "graph_input_count": null, "fail_type": "BackendCompilerFailed", "fail_reason": "backend='broken_backend' raised:\nRuntimeError: broken backend", "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.158574662, "has_guarded_code": false, "config_suppress_errors": true, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": true, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._prims\", \"torch.testing\", \"torch.distributions\", \"torch._refs\", \"torch._decomp\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 158574}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:20:14.089000 1543231 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "99d536cc3a0e4bcc211319f5a4656991"}
 	{
 	"name": "dynamo",
-	"ts": 1733460669792146.0,
+	"ts": 1733527214089673.0,
 	"args": {
-	"compile_id": "-/0/0",
+	"compile_id": "0/0",
 	"frame_key": "1",
 	"co_name": "fn",
-	"co_filename": "/Users/xmfan/core/pytorch/test/dynamo/test_misc.py",
+	"co_filename": "/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py",
 	"co_firstlineno": 10755,
 	"cache_size": 0,
 	"accumulated_cache_size": 0,
@@ -167,9 +167,9 @@ V1205 20:51:09.792000 84489 torch/_dynamo/utils.py:1288] {"chromium_event": {},
 	"non_compliant_ops": [],
 	"compliant_custom_ops": [],
 	"restart_reasons": [],
-	"dynamo_time_before_restart_s": 0.300835,
+	"dynamo_time_before_restart_s": 0.158574662,
 	"has_guarded_code": false,
-	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": true, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch.distributions\", \"torch._refs\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
+	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": true, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._prims\", \"torch.testing\", \"torch.distributions\", \"torch._refs\", \"torch._decomp\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
diff --git a/tests/inputs/comp_failure.log b/tests/inputs/comp_failure.log
index d180a26..91bb76b 100644
--- a/tests/inputs/comp_failure.log
+++ b/tests/inputs/comp_failure.log
@@ -1,41 +1,41 @@
-1205 21:39:01.959000 1608181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "59ff85f09251ba086e5c3577a0fa3ffc"}
+V1206 15:20:13.926000 1543231 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "bb6a3dc5077f86b72cd9a27f294c8718"}
 	{
 	"name": "dynamo",
-	"ts": 1733463541959512.2,
+	"ts": 1733527213926572.8,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:39:01.960000 1608181 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/convert_frame.py", 0]}
-V1205 21:39:01.961000 1608181 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py", 1]}
-V1205 21:39:01.961000 1608181 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/test_case.py", 2]}
-V1205 21:39:01.961000 1608181 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/testing/_internal/common_utils.py", 3]}
-V1205 21:39:01.961000 1608181 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/main.py", 4]}
-V1205 21:39:01.961000 1608181 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/runner.py", 5]}
-V1205 21:39:01.962000 1608181 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/suite.py", 6]}
-V1205 21:39:01.962000 1608181 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/case.py", 7]}
-V1205 21:39:01.962000 1608181 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 12143, "name": "<module>", "filename": 1}, {"line": 39, "name": "run_tests", "filename": 2}, {"line": 1353, "name": "run_tests", "filename": 3}, {"line": 102, "name": "__init__", "filename": 4}, {"line": 274, "name": "runTests", "filename": 4}, {"line": 217, "name": "run", "filename": 5}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 678, "name": "__call__", "filename": 7}, {"line": 3234, "name": "run", "filename": 3}, {"line": 3206, "name": "_run_custom", "filename": 3}, {"line": 623, "name": "run", "filename": 7}, {"line": 579, "name": "_callTestMethod", "filename": 7}, {"line": 3099, "name": "wrapper", "filename": 3}, {"line": 10765, "name": "test_graph_break_compilation_metrics_on_failure", "filename": 1}, {"line": 10755, "name": "fn", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:39:01.962000 1608181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "0b375225b3963b1c6a2a539c768b7171"}
+V1206 15:20:13.927000 1543231 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/convert_frame.py", 0]}
+V1206 15:20:13.927000 1543231 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py", 1]}
+V1206 15:20:13.927000 1543231 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/test_case.py", 2]}
+V1206 15:20:13.927000 1543231 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/testing/_internal/common_utils.py", 3]}
+V1206 15:20:13.927000 1543231 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/main.py", 4]}
+V1206 15:20:13.927000 1543231 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/runner.py", 5]}
+V1206 15:20:13.927000 1543231 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/suite.py", 6]}
+V1206 15:20:13.928000 1543231 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/case.py", 7]}
+V1206 15:20:13.928000 1543231 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 12143, "name": "<module>", "filename": 1}, {"line": 39, "name": "run_tests", "filename": 2}, {"line": 1353, "name": "run_tests", "filename": 3}, {"line": 102, "name": "__init__", "filename": 4}, {"line": 274, "name": "runTests", "filename": 4}, {"line": 217, "name": "run", "filename": 5}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 678, "name": "__call__", "filename": 7}, {"line": 3234, "name": "run", "filename": 3}, {"line": 3206, "name": "_run_custom", "filename": 3}, {"line": 623, "name": "run", "filename": 7}, {"line": 579, "name": "_callTestMethod", "filename": 7}, {"line": 3099, "name": "wrapper", "filename": 3}, {"line": 10765, "name": "test_graph_break_compilation_metrics_on_failure", "filename": 1}, {"line": 10755, "name": "fn", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:20:13.928000 1543231 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "2a27101ddf2d6616698cc3633c77fc3f"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463541962654.2,
+	"ts": 1733527213928251.2,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:39:01.966000 1608181 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 0, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:39:01.967000 1608181 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cpu')", "size": [4, 4], "is_leaf": true, "stride": [4, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f2df2dabfb0>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:39:01.967000 1608181 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 0, "source": "L['x']"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:39:02.398000 1608181 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_x_": [4, 4], "sin": [4, 4]}}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "289c7de0221d9f71a5513269eb25c1a2"}
+V1206 15:20:13.931000 1543231 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 0, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:20:13.931000 1543231 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cpu')", "size": [4, 4], "is_leaf": true, "stride": [4, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f2f42df5bb0>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:20:13.931000 1543231 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 0, "source": "L['x']"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:20:14.084000 1543231 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_x_": [4, 4], "sin": [4, 4]}}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "289c7de0221d9f71a5513269eb25c1a2"}
 	class GraphModule(torch.nn.Module):
 	    def forward(self, L_x_: "f32[4, 4][4, 1]cpu"):
 	        l_x_ = L_x_
@@ -44,46 +44,46 @@ V1205 21:39:02.398000 1608181 torch/_dynamo/output_graph.py:1336] {"dynamo_outpu
 	        sin: "f32[4, 4][4, 1]cpu" = l_x_.sin();  l_x_ = None
 	        return (sin,)
 	        
-V1205 21:39:02.399000 1608181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "fd54d33a53b412b118a4a922ba77c690"}
+V1206 15:20:14.084000 1543231 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "7f609f2caa4686da5214135d7cf5c6fc"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463542399410.2,
+	"ts": 1733527214084600.2,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:39:02.400000 1608181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "29d7b4a031db10b84e0280011a79aa30"}
+V1206 15:20:14.085000 1543231 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "09b2c72fc692bf5a0d9cac524e3ba68b"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463542399974.0,
+	"ts": 1733527214085078.5,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:39:02.400000 1608181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "132ae0b41bd657127db459308240f685"}
+V1206 15:20:14.085000 1543231 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "8eb8723e6aafa6856981ca893f6bc0c9"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463542400620.2,
+	"ts": 1733527214085499.0,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:39:02.402000 1608181 torch/_dynamo/convert_frame.py:1011] {"artifact": {"name": "dynamo_error", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "8240b625682b3ef80eddf3423a91511c"}
+V1206 15:20:14.086000 1543231 torch/_dynamo/convert_frame.py:1011] {"artifact": {"name": "dynamo_error", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "8240b625682b3ef80eddf3423a91511c"}
 	Traceback (most recent call last):
 	  File "/data/users/xmfan/a/pytorch/torch/_dynamo/convert_frame.py", line 989, in _compile
 	    guarded_code = compile_inner(code, one_graph, hooks, transform)
@@ -142,13 +142,13 @@ V1205 21:39:02.402000 1608181 torch/_dynamo/convert_frame.py:1011] {"artifact":
 	Set TORCH_LOGS="+dynamo" and TORCHDYNAMO_VERBOSE=1 for more information
 	
 	
-V1205 21:39:02.407000 1608181 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 563, "dynamo_cumulative_compile_time_us": 437965, "frame_key": "1", "co_name": "fn", "co_filename": "/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py", "co_firstlineno": 10755, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": null, "shape_env_guard_count": null, "graph_op_count": null, "graph_node_count": null, "graph_input_count": null, "fail_type": "BackendCompilerFailed", "fail_reason": "backend='broken_backend' raised:\nRuntimeError: broken backend", "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.440256695, "has_guarded_code": false, "config_suppress_errors": true, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": true, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch.distributions\", \"torch._prims\", \"torch._decomp\", \"torch.testing\", \"torch._refs\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 440256}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:39:02.407000 1608181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "173cb811b47cf60d035071a9fa5cd717"}
+V1206 15:20:14.089000 1543231 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 478, "dynamo_cumulative_compile_time_us": 157247, "frame_key": "1", "co_name": "fn", "co_filename": "/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py", "co_firstlineno": 10755, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": null, "shape_env_guard_count": null, "graph_op_count": null, "graph_node_count": null, "graph_input_count": null, "fail_type": "BackendCompilerFailed", "fail_reason": "backend='broken_backend' raised:\nRuntimeError: broken backend", "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.158574662, "has_guarded_code": false, "config_suppress_errors": true, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": true, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._prims\", \"torch.testing\", \"torch.distributions\", \"torch._refs\", \"torch._decomp\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 158574}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:20:14.089000 1543231 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "99d536cc3a0e4bcc211319f5a4656991"}
 	{
 	"name": "dynamo",
-	"ts": 1733463542407665.2,
+	"ts": 1733527214089673.0,
 	"args": {
-	"compile_id": "-/0/0",
+	"compile_id": "0/0",
 	"frame_key": "1",
 	"co_name": "fn",
 	"co_filename": "/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py",
@@ -167,12 +167,12 @@ V1205 21:39:02.407000 1608181 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"non_compliant_ops": [],
 	"compliant_custom_ops": [],
 	"restart_reasons": [],
-	"dynamo_time_before_restart_s": 0.440256695,
+	"dynamo_time_before_restart_s": 0.158574662,
 	"has_guarded_code": false,
-	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": true, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch.distributions\", \"torch._prims\", \"torch._decomp\", \"torch.testing\", \"torch._refs\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
+	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": true, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._prims\", \"torch.testing\", \"torch.distributions\", \"torch._refs\", \"torch._decomp\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
-	}
\ No newline at end of file
+	}
diff --git a/tests/inputs/comp_metrics.log b/tests/inputs/comp_metrics.log
index 99cd811..15ebec0 100644
--- a/tests/inputs/comp_metrics.log
+++ b/tests/inputs/comp_metrics.log
@@ -1,41 +1,41 @@
-V1205 21:34:04.973000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "c30794dff8d2e5e3ffb104aa42f0079e"}
+V1206 15:19:20.597000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "5c750e81de6af580aac943559bb2808d"}
 	{
 	"name": "dynamo",
-	"ts": 1733463244973329.2,
+	"ts": 1733527160597711.0,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:04.974000 1556615 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/convert_frame.py", 0]}
-V1205 21:34:04.974000 1556615 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py", 1]}
-V1205 21:34:04.974000 1556615 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/test_case.py", 2]}
-V1205 21:34:04.975000 1556615 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/testing/_internal/common_utils.py", 3]}
-V1205 21:34:04.975000 1556615 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/main.py", 4]}
-V1205 21:34:04.975000 1556615 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/runner.py", 5]}
-V1205 21:34:04.975000 1556615 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/suite.py", 6]}
-V1205 21:34:04.975000 1556615 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/case.py", 7]}
-V1205 21:34:04.976000 1556615 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 12143, "name": "<module>", "filename": 1}, {"line": 39, "name": "run_tests", "filename": 2}, {"line": 1353, "name": "run_tests", "filename": 3}, {"line": 102, "name": "__init__", "filename": 4}, {"line": 274, "name": "runTests", "filename": 4}, {"line": 217, "name": "run", "filename": 5}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 678, "name": "__call__", "filename": 7}, {"line": 3234, "name": "run", "filename": 3}, {"line": 3206, "name": "_run_custom", "filename": 3}, {"line": 623, "name": "run", "filename": 7}, {"line": 579, "name": "_callTestMethod", "filename": 7}, {"line": 3099, "name": "wrapper", "filename": 3}, {"line": 10725, "name": "test_graph_break_compilation_metrics", "filename": 1}, {"line": 10715, "name": "fn", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:34:04.976000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "2b08dd650ffe480a0f6faae935eb65bd"}
+V1206 15:19:20.598000 1524181 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/convert_frame.py", 0]}
+V1206 15:19:20.598000 1524181 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py", 1]}
+V1206 15:19:20.598000 1524181 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/test_case.py", 2]}
+V1206 15:19:20.598000 1524181 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/testing/_internal/common_utils.py", 3]}
+V1206 15:19:20.598000 1524181 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/main.py", 4]}
+V1206 15:19:20.599000 1524181 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/runner.py", 5]}
+V1206 15:19:20.599000 1524181 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/suite.py", 6]}
+V1206 15:19:20.599000 1524181 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/case.py", 7]}
+V1206 15:19:20.599000 1524181 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 12143, "name": "<module>", "filename": 1}, {"line": 39, "name": "run_tests", "filename": 2}, {"line": 1353, "name": "run_tests", "filename": 3}, {"line": 102, "name": "__init__", "filename": 4}, {"line": 274, "name": "runTests", "filename": 4}, {"line": 217, "name": "run", "filename": 5}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 678, "name": "__call__", "filename": 7}, {"line": 3234, "name": "run", "filename": 3}, {"line": 3206, "name": "_run_custom", "filename": 3}, {"line": 623, "name": "run", "filename": 7}, {"line": 579, "name": "_callTestMethod", "filename": 7}, {"line": 3099, "name": "wrapper", "filename": 3}, {"line": 10725, "name": "test_graph_break_compilation_metrics", "filename": 1}, {"line": 10715, "name": "fn", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:19:20.599000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "8f9708327d55017fc86f9f639895200a"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463244976347.8,
+	"ts": 1733527160599465.0,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:04.981000 1556615 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 0, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:34:04.982000 1556615 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cpu')", "size": [4, 4], "is_leaf": true, "stride": [4, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f750d3cf8f0>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:34:04.982000 1556615 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 0, "source": "L['x']"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:34:05.523000 1556615 torch/_dynamo/symbolic_convert.py:417] {"artifact": {"name": "dynamo_graph_break_reason", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "f04f316548f8d8ecdbbfa0a2b4ea163a"}
+V1206 15:19:20.602000 1524181 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 0, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:19:20.602000 1524181 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cpu')", "size": [4, 4], "is_leaf": true, "stride": [4, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7faa117385f0>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:19:20.602000 1524181 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 0, "source": "L['x']"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:19:20.753000 1524181 torch/_dynamo/symbolic_convert.py:417] {"artifact": {"name": "dynamo_graph_break_reason", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "f04f316548f8d8ecdbbfa0a2b4ea163a"}
 	Graph break in user code at /data/users/xmfan/a/pytorch/test/dynamo/test_misc.py:10717
 	Reason: Unsupported: 'skip function graph_break in file /data/users/xmfan/a/pytorch/torch/_dynamo/decorators.py'
 	User code traceback:
@@ -59,10 +59,10 @@ V1205 21:34:05.523000 1556615 torch/_dynamo/symbolic_convert.py:417] {"artifact"
 	    raise Unsupported(msg, case_name=case_name)
 	torch._dynamo.exc.Unsupported: 'skip function graph_break in file /data/users/xmfan/a/pytorch/torch/_dynamo/decorators.py'
 	
-V1205 21:34:05.526000 1556615 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 1, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1}
-V1205 21:34:05.526000 1556615 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cpu')", "size": [4, 4], "is_leaf": true, "stride": [4, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f750d3cf8f0>", "describer_id": 1}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1}
-V1205 21:34:05.526000 1556615 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 1, "id": 0, "source": "L['x']"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1}
-V1205 21:34:05.534000 1556615 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_x_": [4, 4], "cos": [4, 4]}}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1, "has_payload": "5ad90272506ac2125309cdad5611f41d"}
+V1206 15:19:20.754000 1524181 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 1, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1}
+V1206 15:19:20.754000 1524181 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cpu')", "size": [4, 4], "is_leaf": true, "stride": [4, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7faa117385f0>", "describer_id": 1}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1}
+V1206 15:19:20.755000 1524181 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 1, "id": 0, "source": "L['x']"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1}
+V1206 15:19:20.759000 1524181 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_x_": [4, 4], "cos": [4, 4]}}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1, "has_payload": "5ad90272506ac2125309cdad5611f41d"}
 	class GraphModule(torch.nn.Module):
 	    def forward(self, L_x_: "f32[4, 4][4, 1]cpu"):
 	        l_x_ = L_x_
@@ -71,33 +71,33 @@ V1205 21:34:05.534000 1556615 torch/_dynamo/output_graph.py:1336] {"dynamo_outpu
 	        cos: "f32[4, 4][4, 1]cpu" = l_x_.cos();  l_x_ = cos = None
 	        return ()
 	        
-V1205 21:34:05.535000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1, "has_payload": "ad5ce49a4bba3ac72fbf24799cf53d1d"}
+V1206 15:19:20.759000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1, "has_payload": "ed79400d0a0b18a3ece9bc669a889690"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463245535257.5,
+	"ts": 1733527160759353.8,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:05.535000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1, "has_payload": "281996a6e4a27dcfadbbc651f972f17c"}
+V1206 15:19:20.759000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1, "has_payload": "9345eaf1b39ecb3f8553a4d6e582465c"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463245535875.2,
+	"ts": 1733527160759676.0,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:05.543000 1556615 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1, "has_payload": "5dd4f6c62ab942940baeb571a2156b38"}
+V1206 15:19:20.763000 1524181 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1, "has_payload": "8ecd19d777f032e1f5d8a99cb3061a05"}
 	
 	TREE_GUARD_MANAGER:
 	+- RootGuardManager
@@ -110,32 +110,32 @@ V1205 21:34:05.543000 1556615 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | +- GuardManager: source=L['x'].cos, accessed_by=GetAttrGuardAccessor(cos)
 	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
 	| | +- GuardManager: source=G['torch'], accessed_by=DictGetItemGuardAccessor('torch')
-	| | | +- ID_MATCH: ___check_obj_id(G['torch'], 140142959899360)                
+	| | | +- ID_MATCH: ___check_obj_id(G['torch'], 140385151530416)                
 	| | | +- GuardManager: source=G['torch']._dynamo, accessed_by=GetAttrGuardAccessor(_dynamo)
-	| | | | +- ID_MATCH: ___check_obj_id(G['torch']._dynamo, 140142898023696)        
+	| | | | +- ID_MATCH: ___check_obj_id(G['torch']._dynamo, 140382452240752)        
 	| | | | +- GuardManager: source=G['torch']._dynamo.graph_break, accessed_by=GetAttrGuardAccessor(graph_break)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['torch']._dynamo.graph_break, 140141631665344)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['torch']._dynamo.graph_break, 140380257582464)
 	
-V1205 21:34:05.543000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1, "has_payload": "2ac3ed4de6267f02a1cf6edd1d3ee64e"}
+V1206 15:19:20.763000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1, "has_payload": "d7b25cdb6c7d92283cf652c89fda37e4"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463245543487.5,
+	"ts": 1733527160763637.5,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:05.548000 1556615 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 617, "dynamo_cumulative_compile_time_us": 567139, "frame_key": "1", "co_name": "fn", "co_filename": "/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py", "co_firstlineno": 10715, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 10, "shape_env_guard_count": 0, "graph_op_count": 1, "graph_node_count": 3, "graph_input_count": 1, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["'skip function graph_break in file /data/users/xmfan/a/pytorch/torch/_dynamo/decorators.py'"], "dynamo_time_before_restart_s": 0.5473113059997559, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch._refs\", \"torch._prims\", \"torch.distributions\", \"torch.testing\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 547311}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1}
-V1205 21:34:05.548000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1, "has_payload": "d3f5f4089bc2e836084460f069f71f5f"}
+V1206 15:19:20.766000 1524181 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 322, "dynamo_cumulative_compile_time_us": 164172, "frame_key": "1", "co_name": "fn", "co_filename": "/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py", "co_firstlineno": 10715, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 10, "shape_env_guard_count": 0, "graph_op_count": 1, "graph_node_count": 3, "graph_input_count": 1, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["'skip function graph_break in file /data/users/xmfan/a/pytorch/torch/_dynamo/decorators.py'"], "dynamo_time_before_restart_s": 0.15378284454345703, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch._decomp\", \"torch.testing\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 153782}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1}
+V1206 15:19:20.766000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 1, "has_payload": "f8061d1a8884764e00182e2ff0f5b39e"}
 	{
 	"name": "dynamo",
-	"ts": 1733463245548837.2,
+	"ts": 1733527160766526.5,
 	"args": {
-	"compile_id": "-/0/0",
+	"compile_id": "0/0",
 	"frame_key": "1",
 	"co_name": "fn",
 	"co_filename": "/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py",
@@ -156,46 +156,46 @@ V1205 21:34:05.548000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"restart_reasons": [
 	"'skip function graph_break in file /data/users/xmfan/a/pytorch/torch/_dynamo/decorators.py'"
 	],
-	"dynamo_time_before_restart_s": 0.5473113059997559,
+	"dynamo_time_before_restart_s": 0.15378284454345703,
 	"has_guarded_code": true,
-	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch._refs\", \"torch._prims\", \"torch.distributions\", \"torch.testing\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
+	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch._decomp\", \"torch.testing\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:05.550000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d36ff4ba061c0ee22fcfcb8ea2801fb4"}
+V1206 15:19:20.768000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "4b467587b7113a6f7f359892684dcffe"}
 	{
 	"name": "dynamo",
-	"ts": 1733463245550244.5,
+	"ts": 1733527160768014.8,
 	"args": {
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:05.550000 1556615 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/eval_frame.py", 8]}
-V1205 21:34:05.551000 1556615 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 12143, "name": "<module>", "filename": 1}, {"line": 39, "name": "run_tests", "filename": 2}, {"line": 1353, "name": "run_tests", "filename": 3}, {"line": 102, "name": "__init__", "filename": 4}, {"line": 274, "name": "runTests", "filename": 4}, {"line": 217, "name": "run", "filename": 5}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 678, "name": "__call__", "filename": 7}, {"line": 3234, "name": "run", "filename": 3}, {"line": 3206, "name": "_run_custom", "filename": 3}, {"line": 623, "name": "run", "filename": 7}, {"line": 579, "name": "_callTestMethod", "filename": 7}, {"line": 3099, "name": "wrapper", "filename": 3}, {"line": 10725, "name": "test_graph_break_compilation_metrics", "filename": 1}, {"line": 573, "name": "_fn", "filename": 8}, {"line": 10717, "name": "torch_dynamo_resume_in_fn_at_10717", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:34:05.551000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "cfd693e90509842b83064ca9279cb42a"}
+V1206 15:19:20.768000 1524181 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/eval_frame.py", 8]}
+V1206 15:19:20.768000 1524181 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 12143, "name": "<module>", "filename": 1}, {"line": 39, "name": "run_tests", "filename": 2}, {"line": 1353, "name": "run_tests", "filename": 3}, {"line": 102, "name": "__init__", "filename": 4}, {"line": 274, "name": "runTests", "filename": 4}, {"line": 217, "name": "run", "filename": 5}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 678, "name": "__call__", "filename": 7}, {"line": 3234, "name": "run", "filename": 3}, {"line": 3206, "name": "_run_custom", "filename": 3}, {"line": 623, "name": "run", "filename": 7}, {"line": 579, "name": "_callTestMethod", "filename": 7}, {"line": 3099, "name": "wrapper", "filename": 3}, {"line": 10725, "name": "test_graph_break_compilation_metrics", "filename": 1}, {"line": 573, "name": "_fn", "filename": 8}, {"line": 10717, "name": "torch_dynamo_resume_in_fn_at_10717", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:19:20.768000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b037746539aa49bc911ce7e825e4910f"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463245551404.0,
+	"ts": 1733527160768607.8,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:05.553000 1556615 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 3, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:34:05.553000 1556615 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cpu')", "size": [4, 4], "is_leaf": true, "stride": [4, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f750d3cf8f0>", "describer_id": 3}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:34:05.553000 1556615 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 3, "id": 0, "source": "L['x']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
-V1205 21:34:05.557000 1556615 torch/_dynamo/symbolic_convert.py:417] {"artifact": {"name": "dynamo_graph_break_reason", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "3be5341dae8897d4fd2d7e55a644cdb1"}
+V1206 15:19:20.769000 1524181 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 3, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:19:20.769000 1524181 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cpu')", "size": [4, 4], "is_leaf": true, "stride": [4, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7faa117385f0>", "describer_id": 3}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:19:20.769000 1524181 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 3, "id": 0, "source": "L['x']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
+V1206 15:19:20.771000 1524181 torch/_dynamo/symbolic_convert.py:417] {"artifact": {"name": "dynamo_graph_break_reason", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "3be5341dae8897d4fd2d7e55a644cdb1"}
 	Graph break in user code at /data/users/xmfan/a/pytorch/test/dynamo/test_misc.py:10719
 	Reason: Unsupported: 'skip function graph_break in file /data/users/xmfan/a/pytorch/torch/_dynamo/decorators.py'
 	User code traceback:
@@ -219,10 +219,10 @@ V1205 21:34:05.557000 1556615 torch/_dynamo/symbolic_convert.py:417] {"artifact"
 	    raise Unsupported(msg, case_name=case_name)
 	torch._dynamo.exc.Unsupported: 'skip function graph_break in file /data/users/xmfan/a/pytorch/torch/_dynamo/decorators.py'
 	
-V1205 21:34:05.559000 1556615 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 4, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1}
-V1205 21:34:05.559000 1556615 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cpu')", "size": [4, 4], "is_leaf": true, "stride": [4, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f750d3cf8f0>", "describer_id": 4}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1}
-V1205 21:34:05.560000 1556615 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 4, "id": 0, "source": "L['x']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1}
-V1205 21:34:05.563000 1556615 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_x_": [4, 4], "sin": [4, 4]}}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1, "has_payload": "fed8fe8097989f418398de0fb8e36aff"}
+V1206 15:19:20.772000 1524181 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 4, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1}
+V1206 15:19:20.773000 1524181 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cpu')", "size": [4, 4], "is_leaf": true, "stride": [4, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7faa117385f0>", "describer_id": 4}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1}
+V1206 15:19:20.773000 1524181 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 4, "id": 0, "source": "L['x']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1}
+V1206 15:19:20.775000 1524181 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_x_": [4, 4], "sin": [4, 4]}}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1, "has_payload": "fed8fe8097989f418398de0fb8e36aff"}
 	class GraphModule(torch.nn.Module):
 	    def forward(self, L_x_: "f32[4, 4][4, 1]cpu"):
 	        l_x_ = L_x_
@@ -231,33 +231,33 @@ V1205 21:34:05.563000 1556615 torch/_dynamo/output_graph.py:1336] {"dynamo_outpu
 	        sin: "f32[4, 4][4, 1]cpu" = l_x_.sin();  l_x_ = sin = None
 	        return ()
 	        
-V1205 21:34:05.564000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1, "has_payload": "0d5e8fe1aa871cb254db9e3e15d917e6"}
+V1206 15:19:20.775000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1, "has_payload": "03bf1bc5843e0bf697685810fb375643"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463245564065.0,
+	"ts": 1733527160775399.5,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:05.564000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1, "has_payload": "eefbe17cf610111217c1c1e52f0b6f95"}
+V1206 15:19:20.775000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1, "has_payload": "feab32619774ebe320ba3da5d514a982"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463245564476.5,
+	"ts": 1733527160775618.0,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:05.571000 1556615 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1, "has_payload": "220dc16e6475b9e6ef7e4fa811dd7e15"}
+V1206 15:19:20.779000 1524181 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1, "has_payload": "3aa22d5919e29b46fc9b9765b464e017"}
 	
 	TREE_GUARD_MANAGER:
 	+- RootGuardManager
@@ -270,32 +270,32 @@ V1205 21:34:05.571000 1556615 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | +- GuardManager: source=L['x'].sin, accessed_by=GetAttrGuardAccessor(sin)
 	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
 	| | +- GuardManager: source=G['torch'], accessed_by=DictGetItemGuardAccessor('torch')
-	| | | +- ID_MATCH: ___check_obj_id(G['torch'], 140142959899360)                
+	| | | +- ID_MATCH: ___check_obj_id(G['torch'], 140385151530416)                
 	| | | +- GuardManager: source=G['torch']._dynamo, accessed_by=GetAttrGuardAccessor(_dynamo)
-	| | | | +- ID_MATCH: ___check_obj_id(G['torch']._dynamo, 140142898023696)        
+	| | | | +- ID_MATCH: ___check_obj_id(G['torch']._dynamo, 140382452240752)        
 	| | | | +- GuardManager: source=G['torch']._dynamo.graph_break, accessed_by=GetAttrGuardAccessor(graph_break)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['torch']._dynamo.graph_break, 140141631665344)
+	| | | | | +- ID_MATCH: ___check_obj_id(G['torch']._dynamo.graph_break, 140380257582464)
 	
-V1205 21:34:05.571000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1, "has_payload": "ae817b8f7943c6537b9e8c951b7b45bb"}
+V1206 15:19:20.779000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1, "has_payload": "b0a86d4ae27b88fbb56b48ad670346a8"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463245571897.5,
+	"ts": 1733527160779562.8,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/1/0"
+	"compile_id": "1/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:05.576000 1556615 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 411, "dynamo_cumulative_compile_time_us": 20493, "frame_key": "2", "co_name": "torch_dynamo_resume_in_fn_at_10717", "co_filename": "/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py", "co_firstlineno": 10717, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 10, "shape_env_guard_count": 0, "graph_op_count": 1, "graph_node_count": 3, "graph_input_count": 1, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["'skip function graph_break in file /data/users/xmfan/a/pytorch/torch/_dynamo/decorators.py'"], "dynamo_time_before_restart_s": 0.006140470504760742, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch._refs\", \"torch._prims\", \"torch.distributions\", \"torch.testing\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 6140}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1}
-V1205 21:34:05.576000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1, "has_payload": "da69fa561d809b54f17aa05cbc2240bf"}
+V1206 15:19:20.782000 1524181 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 218, "dynamo_cumulative_compile_time_us": 10954, "frame_key": "2", "co_name": "torch_dynamo_resume_in_fn_at_10717", "co_filename": "/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py", "co_firstlineno": 10717, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 10, "shape_env_guard_count": 0, "graph_op_count": 1, "graph_node_count": 3, "graph_input_count": 1, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": ["'skip function graph_break in file /data/users/xmfan/a/pytorch/torch/_dynamo/decorators.py'"], "dynamo_time_before_restart_s": 0.0033178329467773438, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch._decomp\", \"torch.testing\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 3317}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1}
+V1206 15:19:20.782000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 1, "has_payload": "6c9b505889778bead7f6675d888c1206"}
 	{
 	"name": "dynamo",
-	"ts": 1733463245576561.5,
+	"ts": 1733527160782270.0,
 	"args": {
-	"compile_id": "-/1/0",
+	"compile_id": "1/0",
 	"frame_key": "2",
 	"co_name": "torch_dynamo_resume_in_fn_at_10717",
 	"co_filename": "/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py",
@@ -316,45 +316,45 @@ V1205 21:34:05.576000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"restart_reasons": [
 	"'skip function graph_break in file /data/users/xmfan/a/pytorch/torch/_dynamo/decorators.py'"
 	],
-	"dynamo_time_before_restart_s": 0.006140470504760742,
+	"dynamo_time_before_restart_s": 0.0033178329467773438,
 	"has_guarded_code": true,
-	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch._refs\", \"torch._prims\", \"torch.distributions\", \"torch.testing\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
+	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch._decomp\", \"torch.testing\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:05.577000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0, "has_payload": "3c2b2c1c6fa65d68c01be441674a77b0"}
+V1206 15:19:20.782000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0, "has_payload": "bb4d2567ed51b41c4b730c4b5e7a18a6"}
 	{
 	"name": "dynamo",
-	"ts": 1733463245577274.0,
+	"ts": 1733527160782667.8,
 	"args": {
-	"compile_id": "-/2/0"
+	"compile_id": "2/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:05.577000 1556615 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 12143, "name": "<module>", "filename": 1}, {"line": 39, "name": "run_tests", "filename": 2}, {"line": 1353, "name": "run_tests", "filename": 3}, {"line": 102, "name": "__init__", "filename": 4}, {"line": 274, "name": "runTests", "filename": 4}, {"line": 217, "name": "run", "filename": 5}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 678, "name": "__call__", "filename": 7}, {"line": 3234, "name": "run", "filename": 3}, {"line": 3206, "name": "_run_custom", "filename": 3}, {"line": 623, "name": "run", "filename": 7}, {"line": 579, "name": "_callTestMethod", "filename": 7}, {"line": 3099, "name": "wrapper", "filename": 3}, {"line": 10725, "name": "test_graph_break_compilation_metrics", "filename": 1}, {"line": 573, "name": "_fn", "filename": 8}, {"line": 10715, "name": "fn", "filename": 1}, {"line": 10719, "name": "torch_dynamo_resume_in_fn_at_10719", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
-V1205 21:34:05.578000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0, "has_payload": "951bfbfb166cfa89c975717153ba7e85"}
+V1206 15:19:20.783000 1524181 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 12143, "name": "<module>", "filename": 1}, {"line": 39, "name": "run_tests", "filename": 2}, {"line": 1353, "name": "run_tests", "filename": 3}, {"line": 102, "name": "__init__", "filename": 4}, {"line": 274, "name": "runTests", "filename": 4}, {"line": 217, "name": "run", "filename": 5}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 678, "name": "__call__", "filename": 7}, {"line": 3234, "name": "run", "filename": 3}, {"line": 3206, "name": "_run_custom", "filename": 3}, {"line": 623, "name": "run", "filename": 7}, {"line": 579, "name": "_callTestMethod", "filename": 7}, {"line": 3099, "name": "wrapper", "filename": 3}, {"line": 10725, "name": "test_graph_break_compilation_metrics", "filename": 1}, {"line": 573, "name": "_fn", "filename": 8}, {"line": 10715, "name": "fn", "filename": 1}, {"line": 10719, "name": "torch_dynamo_resume_in_fn_at_10719", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V1206 15:19:20.783000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0, "has_payload": "39b2e5ddae6d3d44871627453d2f4cd5"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463245578147.0,
+	"ts": 1733527160783133.2,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/2/0"
+	"compile_id": "2/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:05.579000 1556615 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 6, "size": 64}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
-V1205 21:34:05.580000 1556615 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cpu')", "size": [4, 4], "is_leaf": true, "stride": [4, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f750d3cf8f0>", "describer_id": 6}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
-V1205 21:34:05.580000 1556615 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 6, "id": 0, "source": "L['x']"}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
-V1205 21:34:05.583000 1556615 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_x_": [4, 4], "cos": [4, 4]}}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0, "has_payload": "fde42e3550a073ebc24c06419319a4cb"}
+V1206 15:19:20.784000 1524181 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 6, "size": 64}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V1206 15:19:20.784000 1524181 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cpu')", "size": [4, 4], "is_leaf": true, "stride": [4, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7faa117385f0>", "describer_id": 6}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V1206 15:19:20.784000 1524181 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 6, "id": 0, "source": "L['x']"}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V1206 15:19:20.785000 1524181 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_x_": [4, 4], "cos": [4, 4]}}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0, "has_payload": "fde42e3550a073ebc24c06419319a4cb"}
 	class GraphModule(torch.nn.Module):
 	    def forward(self, L_x_: "f32[4, 4][4, 1]cpu"):
 	        l_x_ = L_x_
@@ -363,33 +363,33 @@ V1205 21:34:05.583000 1556615 torch/_dynamo/output_graph.py:1336] {"dynamo_outpu
 	        cos: "f32[4, 4][4, 1]cpu" = l_x_.cos();  l_x_ = None
 	        return (cos,)
 	        
-V1205 21:34:05.583000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0, "has_payload": "7ced6acf6de2a8fa28be17042dc7b1ec"}
+V1206 15:19:20.786000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0, "has_payload": "a5a9a5c382c5b5e7f0e04a511b1fb0ad"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463245583850.2,
+	"ts": 1733527160786318.0,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/2/0"
+	"compile_id": "2/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:05.584000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0, "has_payload": "c61131b8d775fd55bb955844aadb1433"}
+V1206 15:19:20.786000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0, "has_payload": "afb3c76c9a540a3d948e293085b45232"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463245584239.8,
+	"ts": 1733527160786532.2,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/2/0"
+	"compile_id": "2/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:05.586000 1556615 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0, "has_payload": "c5544972e90daf8e28289162d4be0b2b"}
+V1206 15:19:20.787000 1524181 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0, "has_payload": "c5544972e90daf8e28289162d4be0b2b"}
 	
 	TREE_GUARD_MANAGER:
 	+- RootGuardManager
@@ -401,26 +401,26 @@ V1205 21:34:05.586000 1556615 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | +- NO_HASATTR: hasattr(L['x'], '_dynamo_dynamic_indices') == False         
 	| | +- GuardManager: source=L['x'].cos, accessed_by=GetAttrGuardAccessor(cos)
 	
-V1205 21:34:05.587000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0, "has_payload": "10675898b7337b976e2edb5254ef378f"}
+V1206 15:19:20.788000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0, "has_payload": "5f1d06d042031cff969b5958207bd92d"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463245587106.2,
+	"ts": 1733527160788106.5,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/2/0"
+	"compile_id": "2/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:05.591000 1556615 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 389, "dynamo_cumulative_compile_time_us": 8959, "frame_key": "3", "co_name": "torch_dynamo_resume_in_fn_at_10719", "co_filename": "/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py", "co_firstlineno": 10719, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 7, "shape_env_guard_count": 0, "graph_op_count": 1, "graph_node_count": 3, "graph_input_count": 1, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch._refs\", \"torch._prims\", \"torch.distributions\", \"torch.testing\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
-V1205 21:34:05.591000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0, "has_payload": "b84174e8de18120536486832288b94e5"}
+V1206 15:19:20.790000 1524181 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 214, "dynamo_cumulative_compile_time_us": 4973, "frame_key": "3", "co_name": "torch_dynamo_resume_in_fn_at_10719", "co_filename": "/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py", "co_firstlineno": 10719, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 7, "shape_env_guard_count": 0, "graph_op_count": 1, "graph_node_count": 3, "graph_input_count": 1, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch._decomp\", \"torch.testing\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0}
+V1206 15:19:20.790000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 2, "frame_compile_id": 0, "attempt": 0, "has_payload": "95d1d0e5bdf1c32eb5fa5941788bb85f"}
 	{
 	"name": "dynamo",
-	"ts": 1733463245591709.0,
+	"ts": 1733527160790764.0,
 	"args": {
-	"compile_id": "-/2/0",
+	"compile_id": "2/0",
 	"frame_key": "3",
 	"co_name": "torch_dynamo_resume_in_fn_at_10719",
 	"co_filename": "/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py",
@@ -441,43 +441,43 @@ V1205 21:34:05.591000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"restart_reasons": [],
 	"dynamo_time_before_restart_s": 0.0,
 	"has_guarded_code": true,
-	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch._refs\", \"torch._prims\", \"torch.distributions\", \"torch.testing\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
+	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch._decomp\", \"torch.testing\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:07.712000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "64af8e0633e113cc51f5219e4e76c092"}
+V1206 15:19:20.979000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "9daa2219e584575027e227930ae710f3"}
 	{
 	"name": "dynamo",
-	"ts": 1733463247711890.8,
+	"ts": 1733527160979577.0,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:07.712000 1556615 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 12143, "name": "<module>", "filename": 1}, {"line": 39, "name": "run_tests", "filename": 2}, {"line": 1353, "name": "run_tests", "filename": 3}, {"line": 102, "name": "__init__", "filename": 4}, {"line": 274, "name": "runTests", "filename": 4}, {"line": 217, "name": "run", "filename": 5}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 678, "name": "__call__", "filename": 7}, {"line": 3234, "name": "run", "filename": 3}, {"line": 3206, "name": "_run_custom", "filename": 3}, {"line": 623, "name": "run", "filename": 7}, {"line": 579, "name": "_callTestMethod", "filename": 7}, {"line": 3099, "name": "wrapper", "filename": 3}, {"line": 10765, "name": "test_graph_break_compilation_metrics_on_failure", "filename": 1}, {"line": 10755, "name": "fn", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:34:07.713000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "27e926f8091d3c79456f0ac950bde049"}
+V1206 15:19:20.980000 1524181 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 12143, "name": "<module>", "filename": 1}, {"line": 39, "name": "run_tests", "filename": 2}, {"line": 1353, "name": "run_tests", "filename": 3}, {"line": 102, "name": "__init__", "filename": 4}, {"line": 274, "name": "runTests", "filename": 4}, {"line": 217, "name": "run", "filename": 5}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 6}, {"line": 122, "name": "run", "filename": 6}, {"line": 678, "name": "__call__", "filename": 7}, {"line": 3234, "name": "run", "filename": 3}, {"line": 3206, "name": "_run_custom", "filename": 3}, {"line": 623, "name": "run", "filename": 7}, {"line": 579, "name": "_callTestMethod", "filename": 7}, {"line": 3099, "name": "wrapper", "filename": 3}, {"line": 10765, "name": "test_graph_break_compilation_metrics_on_failure", "filename": 1}, {"line": 10755, "name": "fn", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:19:20.980000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "b19804eb17a0afa33f8303cc2f33751f"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463247713022.2,
+	"ts": 1733527160980233.2,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:07.715000 1556615 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 8, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:34:07.715000 1556615 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cpu')", "size": [4, 4], "is_leaf": true, "stride": [4, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f7504f9c710>", "describer_id": 8}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:34:07.715000 1556615 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 8, "id": 0, "source": "L['x']"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:34:07.720000 1556615 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_x_": [4, 4], "sin": [4, 4]}}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "289c7de0221d9f71a5513269eb25c1a2"}
+V1206 15:19:20.981000 1524181 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 8, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:19:20.981000 1524181 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 2, "dtype": "torch.float32", "device": "device(type='cpu')", "size": [4, 4], "is_leaf": true, "stride": [4, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7faa113e8830>", "describer_id": 8}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:19:20.981000 1524181 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 8, "id": 0, "source": "L['x']"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:19:20.983000 1524181 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_x_": [4, 4], "sin": [4, 4]}}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "289c7de0221d9f71a5513269eb25c1a2"}
 	class GraphModule(torch.nn.Module):
 	    def forward(self, L_x_: "f32[4, 4][4, 1]cpu"):
 	        l_x_ = L_x_
@@ -486,46 +486,46 @@ V1205 21:34:07.720000 1556615 torch/_dynamo/output_graph.py:1336] {"dynamo_outpu
 	        sin: "f32[4, 4][4, 1]cpu" = l_x_.sin();  l_x_ = None
 	        return (sin,)
 	        
-V1205 21:34:07.720000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "f0ed19bee2d03bb6faa7ff5eafbc6838"}
+V1206 15:19:20.984000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "b66a10af2172713be017fe217480e433"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463247720867.8,
+	"ts": 1733527160984193.0,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:07.721000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "1ad935d71451a056e8e66b459e76ae83"}
+V1206 15:19:20.984000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "4fbd69f2113763626d452ced55f6934e"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463247721325.5,
+	"ts": 1733527160984428.0,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:07.721000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "397f1eb49d1c5ab22137cef73175815e"}
+V1206 15:19:20.984000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "d5551403b2946d84819009efa6f04131"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463247721882.0,
+	"ts": 1733527160984732.5,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:34:07.723000 1556615 torch/_dynamo/convert_frame.py:1011] {"artifact": {"name": "dynamo_error", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "8240b625682b3ef80eddf3423a91511c"}
+V1206 15:19:20.985000 1524181 torch/_dynamo/convert_frame.py:1011] {"artifact": {"name": "dynamo_error", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "8240b625682b3ef80eddf3423a91511c"}
 	Traceback (most recent call last):
 	  File "/data/users/xmfan/a/pytorch/torch/_dynamo/convert_frame.py", line 989, in _compile
 	    guarded_code = compile_inner(code, one_graph, hooks, transform)
@@ -584,13 +584,13 @@ V1205 21:34:07.723000 1556615 torch/_dynamo/convert_frame.py:1011] {"artifact":
 	Set TORCH_LOGS="+dynamo" and TORCHDYNAMO_VERBOSE=1 for more information
 	
 	
-V1205 21:34:07.728000 1556615 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 457, "dynamo_cumulative_compile_time_us": 8859, "frame_key": "1", "co_name": "fn", "co_filename": "/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py", "co_firstlineno": 10755, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": null, "shape_env_guard_count": null, "graph_op_count": null, "graph_node_count": null, "graph_input_count": null, "fail_type": "BackendCompilerFailed", "fail_reason": "backend='broken_backend' raised:\nRuntimeError: broken backend", "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.01111214, "has_guarded_code": false, "config_suppress_errors": true, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": true, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch._refs\", \"torch._prims\", \"torch.distributions\", \"torch.testing\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 11112}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:34:07.728000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "28dd629a11f6ff13dd4bec9e4e19f410"}
+V1206 15:19:20.988000 1524181 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 234, "dynamo_cumulative_compile_time_us": 4499, "frame_key": "1", "co_name": "fn", "co_filename": "/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py", "co_firstlineno": 10755, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": null, "shape_env_guard_count": null, "graph_op_count": null, "graph_node_count": null, "graph_input_count": null, "fail_type": "BackendCompilerFailed", "fail_reason": "backend='broken_backend' raised:\nRuntimeError: broken backend", "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.005744147, "has_guarded_code": false, "config_suppress_errors": true, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": true, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch._decomp\", \"torch.testing\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 5744}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:19:20.988000 1524181 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "7845c97ccc7681a0ddb92dc33ca4ba2a"}
 	{
 	"name": "dynamo",
-	"ts": 1733463247728613.0,
+	"ts": 1733527160988554.2,
 	"args": {
-	"compile_id": "-/0/0",
+	"compile_id": "0/0",
 	"frame_key": "1",
 	"co_name": "fn",
 	"co_filename": "/data/users/xmfan/a/pytorch/test/dynamo/test_misc.py",
@@ -609,9 +609,9 @@ V1205 21:34:07.728000 1556615 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"non_compliant_ops": [],
 	"compliant_custom_ops": [],
 	"restart_reasons": [],
-	"dynamo_time_before_restart_s": 0.01111214,
+	"dynamo_time_before_restart_s": 0.005744147,
 	"has_guarded_code": false,
-	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": true, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch._refs\", \"torch._prims\", \"torch.distributions\", \"torch.testing\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
+	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": true, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch._decomp\", \"torch.testing\", \"torch.distributions\", \"torch._prims\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
diff --git a/tests/inputs/simple.log b/tests/inputs/simple.log
index e3497d3..fcf4638 100644
--- a/tests/inputs/simple.log
+++ b/tests/inputs/simple.log
@@ -1,7 +1,7 @@
-V1205 21:30:31.365000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "has_payload": "d47977f4a430b21be6abbb1fbf91f88e"}
+V1206 15:18:15.925000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "has_payload": "490a3a38ce10390374e403dd6e063c7f"}
 	{
 	"name": "compile_file",
-	"ts": 1733463031365424.5,
+	"ts": 1733527095925179.5,
 	"args": {
 	"compile_id": "None"
 	},
@@ -10,10 +10,10 @@ V1205 21:30:31.365000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:32.963000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "has_payload": "4425406869a5a211e2625e162bfad532"}
+V1206 15:18:16.845000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "has_payload": "6762d47fdbf80071626529f25dc69013"}
 	{
 	"name": "compile_file",
-	"ts": 1733463032962983.8,
+	"ts": 1733527096845517.0,
 	"args": {
 	"compile_id": "None"
 	},
@@ -22,10 +22,10 @@ V1205 21:30:32.963000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:34.912000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "has_payload": "d5d927457f64f150c1694d7ef554e15f"}
+V1206 15:18:17.965000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "has_payload": "acbf9ac4749cce84db0b5b9acd4b26e5"}
 	{
 	"name": "compile_file",
-	"ts": 1733463034912072.2,
+	"ts": 1733527097965329.8,
 	"args": {
 	"compile_id": "None"
 	},
@@ -34,10 +34,10 @@ V1205 21:30:34.912000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:36.583000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "has_payload": "9a677016a741b97e5f6a0d06c215e494"}
+V1206 15:18:18.815000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "has_payload": "6050df090b76feb2a53d6cc2fdb23908"}
 	{
 	"name": "compile_file",
-	"ts": 1733463036582869.5,
+	"ts": 1733527098815455.0,
 	"args": {
 	"compile_id": "None"
 	},
@@ -46,48 +46,72 @@ V1205 21:30:36.583000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:38.620000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "d6889d0f3ff8b01cd12c30a4b41396ee"}
+V1206 15:18:19.883000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "has_payload": "f35f8d0fcca57799f1a731e665c95987"}
+	{
+	"name": "compile_file",
+	"ts": 1733527099883504.5,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "B",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V1206 15:18:19.999000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "has_payload": "01af5c3df1e6a6aa1747dab60c80c0b9"}
+	{
+	"name": "compile_file",
+	"ts": 1733527099999552.0,
+	"args": {
+	"compile_id": "None"
+	},
+	"ph": "E",
+	"cat": "dynamo_timed",
+	"tid": 0,
+	"pid": 0
+	}
+V1206 15:18:20.252000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "2edd807b9a63e3b4c6d50b6297bb4677"}
 	{
 	"name": "dynamo",
-	"ts": 1733463038619928.0,
+	"ts": 1733527100252271.5,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:38.621000 1509703 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/convert_frame.py", 0]}
-V1205 21:30:38.621000 1509703 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/test/inductor/test_torchinductor.py", 1]}
-V1205 21:30:38.621000 1509703 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_inductor/test_case.py", 2]}
-V1205 21:30:38.622000 1509703 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/test_case.py", 3]}
-V1205 21:30:38.622000 1509703 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/testing/_internal/common_utils.py", 4]}
-V1205 21:30:38.622000 1509703 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/main.py", 5]}
-V1205 21:30:38.622000 1509703 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/runner.py", 6]}
-V1205 21:30:38.622000 1509703 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/suite.py", 7]}
-V1205 21:30:38.622000 1509703 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/case.py", 8]}
-V1205 21:30:38.622000 1509703 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/contextlib.py", 9]}
-V1205 21:30:38.623000 1509703 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/nn/modules/module.py", 10]}
-V1205 21:30:38.623000 1509703 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/eval_frame.py", 11]}
-V1205 21:30:38.623000 1509703 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 13037, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 11906, "name": "new_test", "filename": 1}, {"line": 81, "name": "inner", "filename": 9}, {"line": 11160, "name": "test_custom_op_fixed_layout_channels_last", "filename": 1}, {"line": 1739, "name": "_wrapped_call_impl", "filename": 10}, {"line": 1750, "name": "_call_impl", "filename": 10}, {"line": 573, "name": "_fn", "filename": 11}, {"line": 1739, "name": "_wrapped_call_impl", "filename": 10}, {"line": 11123, "name": "forward", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:30:38.623000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "791657b1ab1f4a0e0c223c5ccd6ecabd"}
+V1206 15:18:20.253000 1500233 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/convert_frame.py", 0]}
+V1206 15:18:20.253000 1500233 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/test/inductor/test_torchinductor.py", 1]}
+V1206 15:18:20.253000 1500233 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_inductor/test_case.py", 2]}
+V1206 15:18:20.253000 1500233 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/test_case.py", 3]}
+V1206 15:18:20.253000 1500233 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/testing/_internal/common_utils.py", 4]}
+V1206 15:18:20.254000 1500233 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/main.py", 5]}
+V1206 15:18:20.254000 1500233 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/runner.py", 6]}
+V1206 15:18:20.254000 1500233 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/suite.py", 7]}
+V1206 15:18:20.254000 1500233 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/case.py", 8]}
+V1206 15:18:20.254000 1500233 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/contextlib.py", 9]}
+V1206 15:18:20.254000 1500233 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/nn/modules/module.py", 10]}
+V1206 15:18:20.254000 1500233 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/eval_frame.py", 11]}
+V1206 15:18:20.254000 1500233 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 13037, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 11906, "name": "new_test", "filename": 1}, {"line": 81, "name": "inner", "filename": 9}, {"line": 11160, "name": "test_custom_op_fixed_layout_channels_last", "filename": 1}, {"line": 1739, "name": "_wrapped_call_impl", "filename": 10}, {"line": 1750, "name": "_call_impl", "filename": 10}, {"line": 573, "name": "_fn", "filename": 11}, {"line": 1739, "name": "_wrapped_call_impl", "filename": 10}, {"line": 11123, "name": "forward", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:18:20.254000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "0a2ce00ecd7d0ef78918976e3561193a"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463038623734.0,
+	"ts": 1733527100254653.2,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:39.083000 1509703 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 0, "size": 20971520}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:30:39.083000 1509703 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 320, 128, 128], "is_leaf": true, "stride": [5242880, 1, 40960, 320], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f896b1159d0>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:30:39.084000 1509703 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 0, "source": "L['x']"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:30:39.122000 1509703 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_x_": [1, 320, 128, 128], "out": [1, 320, 128, 128], "input_1": [1, 320, 128, 128], "out_1": [1, 320, 128, 128]}}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "43df19c7545f412eca248f900fce6ce5"}
+V1206 15:18:20.575000 1500233 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 0, "size": 20971520}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:18:20.575000 1500233 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 320, 128, 128], "is_leaf": true, "stride": [5242880, 1, 40960, 320], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f1309f8fe30>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:18:20.576000 1500233 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 0, "source": "L['x']"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:18:20.599000 1500233 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_x_": [1, 320, 128, 128], "out": [1, 320, 128, 128], "input_1": [1, 320, 128, 128], "out_1": [1, 320, 128, 128]}}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "43df19c7545f412eca248f900fce6ce5"}
 	class GraphModule(torch.nn.Module):
 	    def forward(self, L_x_: "f32[1, 320, 128, 128][5242880, 1, 40960, 320]cuda:0"):
 	        l_x_ = L_x_
@@ -102,80 +126,80 @@ V1205 21:30:39.122000 1509703 torch/_dynamo/output_graph.py:1336] {"dynamo_outpu
 	        out_1: "f32[1, 320, 128, 128][5242880, 1, 40960, 320]cuda:0" = torch.ops.test.baz(input_1);  input_1 = None
 	        return (out_1,)
 	        
-V1205 21:30:39.123000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "1d53de8515ee05f08d21caf8248c16fb"}
+V1206 15:18:20.600000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "b02af7e6b7baf7b21e5166764db0a66f"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463039123148.2,
+	"ts": 1733527100600090.5,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:39.123000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "cc26f85f890d932029195e3660facd08"}
+V1206 15:18:20.600000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "632df8822c4c5df2079dfd71f5827dc0"}
 	{
 	"name": "_recursive_pre_grad_passes",
-	"ts": 1733463039123600.5,
+	"ts": 1733527100600377.2,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:39.141000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "1c23772f9224e3f528e674176bc926c6"}
+V1206 15:18:20.612000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ca25b04b9513de1a0144b92d0d669023"}
 	{
 	"name": "_recursive_pre_grad_passes",
-	"ts": 1733463039141383.8,
+	"ts": 1733527100612301.8,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:39.143000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "c07fb56f2c28d377129422220d94dd12"}
+V1206 15:18:20.613000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ed067c57dd8cd9be0b9084c7c54c3c2e"}
 	{
 	"name": "inductor_codecache_torch_key",
-	"ts": 1733463039143121.2,
+	"ts": 1733527100613547.5,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:39.344000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "e4a2fdb02cd70b592eeea3cd37e7b5d1"}
+V1206 15:18:20.687000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "80c4f441db0b0d76a817bcb027333d90"}
 	{
 	"name": "inductor_codecache_torch_key",
-	"ts": 1733463039344720.2,
+	"ts": 1733527100687165.2,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.063000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ab142e30a375b39e0c50f26db7f82273"}
+V1206 15:18:21.086000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "2ae002c6629696bbf0b79d8f1b7e5969"}
 	{
 	"name": "create_aot_dispatcher_function",
-	"ts": 1733463041063258.5,
+	"ts": 1733527101086489.5,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.095000 1509703 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:214] {"artifact": {"name": "aot_forward_graph_fw_metadata", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "619a91cc6a2a9ea21ce0754dd5524cda"}
+V1206 15:18:21.105000 1500233 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:214] {"artifact": {"name": "aot_forward_graph_fw_metadata", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "619a91cc6a2a9ea21ce0754dd5524cda"}
 	ViewAndMutationMeta(input_info=[InputAliasInfo(is_leaf=True,
 	                                              mutates_data=False,
 	                                              mutates_metadata=False,
@@ -209,7 +233,7 @@ V1205 21:30:41.095000 1509703 torch/_functorch/_aot_autograd/dispatch_and_compil
 	                    indices_of_inputs_that_requires_grad_with_mutations_in_bw=[],
 	                    bw_donated_idxs=None,
 	                    num_backward_tokens=0)
-V1205 21:30:41.097000 1509703 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:232] {"aot_inference_graph": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "9217735d824ab4b7c7cb21827ecb4673"}
+V1206 15:18:21.106000 1500233 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:232] {"aot_inference_graph": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "9217735d824ab4b7c7cb21827ecb4673"}
 	class <lambda>(torch.nn.Module):
 	    def forward(self, arg0_1: "f32[1, 320, 128, 128][5242880, 1, 40960, 320]cuda:0"):
 	         # File: /data/users/xmfan/a/pytorch/test/inductor/test_torchinductor.py:11119 in helper, code: out = F.gelu(x)
@@ -231,56 +255,56 @@ V1205 21:30:41.097000 1509703 torch/_functorch/_aot_autograd/dispatch_and_compil
 	        baz: "f32[1, 320, 128, 128][5242880, 1, 40960, 320]cuda:0" = torch.ops.test.baz.default(mul_4);  mul_4 = None
 	        return (baz,)
 	        
-V1205 21:30:41.098000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "5546c50945a9e736de64df153a2e55f3"}
+V1206 15:18:21.106000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "404b42df98884a4e0973f79434379f9b"}
 	{
 	"name": "compile_fx.<locals>.fw_compiler_base",
-	"ts": 1733463041098321.5,
+	"ts": 1733527101106855.5,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.098000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "69c43b520b9077cd4702743933a32f23"}
+V1206 15:18:21.107000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ef2aa74274539117c0be992171939d5b"}
 	{
 	"name": "_recursive_joint_graph_passes",
-	"ts": 1733463041098895.8,
+	"ts": 1733527101107196.5,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.260000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "40d624266dc43c9247cdb7d4f1ba7ef0"}
+V1206 15:18:21.196000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "a6e306d9d22bfce0b4cbd732937552d9"}
 	{
 	"name": "_recursive_joint_graph_passes",
-	"ts": 1733463041260431.0,
+	"ts": 1733527101196659.2,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.261000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "267d06b5b3058c8081f71dee7c38b39d"}
+V1206 15:18:21.197000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "6c62c7baf7e22c49027f929729dec376"}
 	{
 	"name": "inductor_compile",
-	"ts": 1733463041260961.5,
+	"ts": 1733527101197129.0,
 	"args": {
 	"fn_name": "compile_fx_inner",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.274000 1509703 torch/_inductor/compile_fx.py:835] {"artifact": {"name": "fx_graph_runnable", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "cd5ba9887d4066a3fcfc130fa7262f7d"}
+V1206 15:18:21.206000 1500233 torch/_inductor/compile_fx.py:835] {"artifact": {"name": "fx_graph_runnable", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "2470f3342eae7074d57bd8d2177ff99c"}
 	
 	import torch
 	from torch import tensor, device
@@ -297,7 +321,7 @@ V1205 21:30:41.274000 1509703 torch/_inductor/compile_fx.py:835] {"artifact": {"
 	torch._dynamo.config.suppress_errors = False
 	torch._dynamo.config._ddp_optimization_mode = ['ddp_optimizer', 'python_reducer', 'python_reducer_without_compiled_forward', 'no_optimization']
 	torch._dynamo.config.raise_on_ctx_manager_usage = True
-	torch._dynamo.config._save_config_ignore = {'repro_level', 'constant_functions', 'repro_after', 'skipfiles_inline_module_allowlist'}
+	torch._dynamo.config._save_config_ignore = {'constant_functions', 'skipfiles_inline_module_allowlist', 'repro_after', 'repro_level'}
 	torch._dynamo.config.log_compilation_metrics = False
 	torch._dynamo.config.reorderable_logging_functions = set()
 	torch._inductor.config.debug = True
@@ -331,20 +355,20 @@ V1205 21:30:41.274000 1509703 torch/_inductor/compile_fx.py:835] {"artifact": {"
 	
 	
 	
-	# torch version: 2.6.0a0+git5f4afda
-	# torch cuda version: 12.1
-	# torch git version: 5f4afda82a5a7a708effa35379140b88511b1f5f
+	# torch version: 2.6.0a0+giteece9ec
+	# torch cuda version: 12.2
+	# torch git version: eece9ecd62cae84bc2f915fc48cffe43e30256aa
 	
 	
 	# CUDA Info: 
 	# nvcc: NVIDIA (R) Cuda compiler driver 
 	# Copyright (c) 2005-2023 NVIDIA Corporation 
-	# Built on Mon_Apr__3_17:16:06_PDT_2023 
-	# Cuda compilation tools, release 12.1, V12.1.105 
-	# Build cuda_12.1.r12.1/compiler.32688072_0 
+	# Built on Tue_Aug_15_22:02:13_PDT_2023 
+	# Cuda compilation tools, release 12.2, V12.2.140 
+	# Build cuda_12.2.r12.2/compiler.33191640_0 
 	
 	# GPU Hardware Info: 
-	# NVIDIA PG509-210 : 8 
+	# NVIDIA H100 : 8 
 	
 	
 	from torch.nn import *
@@ -382,31 +406,31 @@ V1205 21:30:41.274000 1509703 torch/_inductor/compile_fx.py:835] {"artifact": {"
 	        # To run it separately, do 
 	        # mod, args = run_repro(mod, load_args, accuracy=False, command='get_args', save_dir=None, tracing_mode='real', check_str=None)
 	        # mod(*args)
-V1205 21:30:41.282000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "9d6c37bb3008aeb7e6c7b80f0bdff995"}
+V1206 15:18:21.212000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "5c03608cdc5fd6af674af56b678a82ad"}
 	{
 	"name": "_recursive_post_grad_passes",
-	"ts": 1733463041282638.5,
+	"ts": 1733527101212025.5,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.309000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "f58e503bf8d00156ac79b4ba5cd79005"}
+V1206 15:18:21.229000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "02fcd8096aaa1296b71ae553fbca81c1"}
 	{
 	"name": "_recursive_post_grad_passes",
-	"ts": 1733463041309595.5,
+	"ts": 1733527101229240.8,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.311000 1509703 torch/_inductor/compile_fx.py:898] {"inductor_post_grad_graph": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "b1c8a699812350437e2781e8ae0cdd78"}
+V1206 15:18:21.230000 1500233 torch/_inductor/compile_fx.py:898] {"inductor_post_grad_graph": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "b1c8a699812350437e2781e8ae0cdd78"}
 	class <lambda>(torch.nn.Module):
 	    def forward(self, arg0_1: "f32[1, 320, 128, 128][5242880, 1, 40960, 320]cuda:0"):
 	        # No stacktrace found for following nodes
@@ -433,176 +457,176 @@ V1205 21:30:41.311000 1509703 torch/_inductor/compile_fx.py:898] {"inductor_post
 	        baz: "f32[1, 320, 128, 128][5242880, 1, 40960, 320]cuda:0" = torch.ops.test.baz.default(mul_4);  mul_4 = None
 	        return (baz,)
 	        
-V1205 21:30:41.331000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "df7b827a8cc3c8c035d24a36f1882d46"}
+V1206 15:18:21.247000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "cd84113519eaf110be2573a6d9ab7d5c"}
 	{
 	"name": "GraphLowering.run",
-	"ts": 1733463041331278.8,
+	"ts": 1733527101247266.8,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.389000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "a18f3645499a6ee7542aaa7cac0c753d"}
+V1206 15:18:21.284000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "8c60f183722282266a91df7c4e6a649f"}
 	{
 	"name": "GraphLowering.run",
-	"ts": 1733463041389151.5,
+	"ts": 1733527101284562.5,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.389000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "362a01bd9dbce0f522eb1cd663ba49cd"}
+V1206 15:18:21.284000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "170214c32ec5bf03937f621741046dcd"}
 	{
 	"name": "GraphLowering.compile_to_fn",
-	"ts": 1733463041389608.0,
+	"ts": 1733527101284939.5,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.390000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "2660865b70d08c40985542700e05d33e"}
+V1206 15:18:21.285000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "2224b3ed7a31123f4b48c477e08d0cd1"}
 	{
 	"name": "code_gen",
-	"ts": 1733463041389964.5,
+	"ts": 1733527101285145.2,
 	"args": {
 	"fn_name": "GraphLowering.compile_to_module",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.390000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "5e87afa4676253947a9455dea5ac438c"}
+V1206 15:18:21.285000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "1f11a0e6ad50d55bb5656736d0b49cb8"}
 	{
 	"name": "GraphLowering.codegen",
-	"ts": 1733463041390290.0,
+	"ts": 1733527101285316.5,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.393000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "7e14719868a1520bdcd167bc2da4cb02"}
+V1206 15:18:21.287000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "6644f6874ee64c9e36e4f40bfd6f3d1a"}
 	{
 	"name": "Scheduler.__init__",
-	"ts": 1733463041393230.0,
+	"ts": 1733527101287676.0,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.420000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "2a2e99188e4db1437fe4773a203f984c"}
+V1206 15:18:21.304000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "bd092078f3a275d331fa39b88de2d12c"}
 	{
 	"name": "Scheduler.fused_nodes",
-	"ts": 1733463041420196.8,
+	"ts": 1733527101304485.8,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.421000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "92976e3edb24e6c232c863970dbff2d5"}
+V1206 15:18:21.305000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "41a7f1db6eb37c96544c9ab2fda60033"}
 	{
 	"name": "Scheduler.fused_nodes",
-	"ts": 1733463041421251.2,
+	"ts": 1733527101305930.5,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.425000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "537e4d540dfdcb5fb6e2f5c9a72c6bfe"}
+V1206 15:18:21.308000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "4aa4bc04054bdb300303a01c86fdbf07"}
 	{
 	"name": "Scheduler.__init__",
-	"ts": 1733463041425553.8,
+	"ts": 1733527101308460.5,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.425000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "e106fb215cefe2b9e36b3a50cf406692"}
+V1206 15:18:21.308000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "3548e38bf922ab2f500ca438d9dd5db4"}
 	{
 	"name": "Scheduler.codegen",
-	"ts": 1733463041425904.5,
+	"ts": 1733527101308690.8,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.730000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "de0060cd05922e5408f5189e647fe60b"}
+V1206 15:18:21.450000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "cb811b477878494e7cee5bcffb0dd0fa"}
 	{
 	"name": "Scheduler.codegen",
-	"ts": 1733463041730180.2,
+	"ts": 1733527101450232.0,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.730000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "419a3f0dd6732b174e4c3e28810225a2"}
+V1206 15:18:21.450000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "d3cf9b88610841c93e68e258b4160410"}
 	{
 	"name": "PythonWrapperCodegen.generate",
-	"ts": 1733463041730630.8,
+	"ts": 1733527101450612.8,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.732000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "555591efb709b0892936bf36bcadd66e"}
+V1206 15:18:21.451000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "12933d2f0c133902de1bcf9876e27aa4"}
 	{
 	"name": "PythonWrapperCodegen.generate",
-	"ts": 1733463041732283.0,
+	"ts": 1733527101451570.0,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.732000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "0d1b80a56f601383f3c5d3b3ad664c8e"}
+V1206 15:18:21.451000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "8661f4a66b85846d47bc578e43957bb5"}
 	{
 	"name": "GraphLowering.codegen",
-	"ts": 1733463041732695.0,
+	"ts": 1733527101451789.8,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.733000 1509703 torch/_inductor/graph.py:2030] {"inductor_output_code": {"filename": "/tmp/tmpij_nlxoc/ko/ckoywtk7u7ja5djgizn66xwig44vwugmkkdcfb2xnzahmy6x7pih.py"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "f5bb5f3caafa11cfde97fae0e40a7648"}
+V1206 15:18:21.452000 1500233 torch/_inductor/graph.py:2030] {"inductor_output_code": {"filename": "/tmp/tmprds_hch0/yj/cyjwx6x6efpuwt4dvr4ev42v4ghac5zo2uggvscht2otwnesito6.py"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "35842d763c8905794995d6c4a089d3b3"}
 	# AOT ID: ['0_inference']
 	from ctypes import c_void_p, c_long, c_int
 	import torch
@@ -643,7 +667,7 @@ V1205 21:30:41.733000 1509703 torch/_inductor/graph.py:2030] {"inductor_output_c
 	empty_strided_p2p = torch._C._distributed_c10d._SymmetricMemory.empty_strided_p2p
 	
 	
-	# kernel path: /tmp/tmpij_nlxoc/n5/cn5q4d44n73jczky25rgbt55muvyp5nrrqizddddeh4o3ftmmksv.py
+	# kernel path: /tmp/tmprds_hch0/ke/ckedh2vjam5uo7wobyr5yq2et3clblzbzgykujgmjbmkj5uyimpl.py
 	# Topologically Sorted Source Nodes: [input_1], Original ATen: [aten.native_dropout]
 	# Source node to ATen node mapping:
 	#   input_1 => inductor_lookup_seed_default, inductor_random_default
@@ -663,8 +687,8 @@ V1205 21:30:41.733000 1509703 torch/_inductor/graph.py:2030] {"inductor_output_c
 	@triton_heuristics.pointwise(
 	    size_hints=[8388608], 
 	    filename=__file__,
-	    triton_meta={'signature': {'in_ptr0': '*i64', 'out_ptr0': '*fp32', 'load_seed_offset': 'i32', 'xnumel': 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=80, major=8, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=108, warp_size=32), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 3), equal_to_1=())]},
-	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_native_dropout_0', 'mutated_arg_names': [], 'optimize_mem': True, 'no_x_dim': False, 'num_load': 0, 'num_reduction': 0, 'backend_hash': '59CDC28C5AC44AE92A1C88C87D935A8E2ADCDA7AF532B6269756431229691604', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': False, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	    triton_meta={'signature': {'in_ptr0': '*i64', 'out_ptr0': '*fp32', 'load_seed_offset': 'i32', 'xnumel': 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132, warp_size=32), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 3), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_native_dropout_0', 'mutated_arg_names': [], 'optimize_mem': True, 'no_x_dim': False, 'num_load': 0, 'num_reduction': 0, 'backend_hash': '562E840D41CEB1D8E51DE726EA7592B0C37A0C6FBD72CF4E958863CEC11D41A7', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': False, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
 	    min_elem_per_thread=0
 	)
 	@triton.jit
@@ -681,7 +705,7 @@ V1205 21:30:41.733000 1509703 torch/_inductor/graph.py:2030] {"inductor_output_c
 	''', device_str='cuda')
 	
 	
-	# kernel path: /tmp/tmpij_nlxoc/6z/c6zc3pmsngno674baz27qob2gc62epq7e42k4nnd2iktnvbisajp.py
+	# kernel path: /tmp/tmprds_hch0/eu/ceutejflq32k5wvvsucbkscrxmvjorlj2t3eq7tgcfyqrz5mhnh3.py
 	# Topologically Sorted Source Nodes: [input_1, out, out_1], Original ATen: [aten.native_dropout, aten.gelu, test.baz]
 	# Source node to ATen node mapping:
 	#   input_1 => clone, gt, mul_3, mul_4
@@ -711,8 +735,8 @@ V1205 21:30:41.733000 1509703 torch/_inductor/graph.py:2030] {"inductor_output_c
 	@triton_heuristics.pointwise(
 	    size_hints=[16384, 512], tile_hint=TileHint.DEFAULT,
 	    filename=__file__,
-	    triton_meta={'signature': {'in_ptr0': '*fp32', 'in_ptr1': '*fp32', 'out_ptr0': '*fp32', 'ynumel': 'i32', 'xnumel': 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=80, major=8, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=108, warp_size=32), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4), equal_to_1=())]},
-	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_baz_gelu_native_dropout_1', 'mutated_arg_names': [], 'optimize_mem': True, 'no_x_dim': False, 'num_load': 2, 'num_reduction': 0, 'backend_hash': '59CDC28C5AC44AE92A1C88C87D935A8E2ADCDA7AF532B6269756431229691604', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': False, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
+	    triton_meta={'signature': {'in_ptr0': '*fp32', 'in_ptr1': '*fp32', 'out_ptr0': '*fp32', 'ynumel': 'i32', 'xnumel': 'i32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132, warp_size=32), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4), equal_to_1=())]},
+	    inductor_meta={'autotune_hints': set(), 'kernel_name': 'triton_poi_fused_baz_gelu_native_dropout_1', 'mutated_arg_names': [], 'optimize_mem': True, 'no_x_dim': False, 'num_load': 2, 'num_reduction': 0, 'backend_hash': '562E840D41CEB1D8E51DE726EA7592B0C37A0C6FBD72CF4E958863CEC11D41A7', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': True, 'autotune_pointwise': False, 'autotune_remote_cache': None, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
 	    min_elem_per_thread=0
 	)
 	@triton.jit
@@ -790,109 +814,109 @@ V1205 21:30:41.733000 1509703 torch/_inductor/graph.py:2030] {"inductor_output_c
 	    from torch._inductor.wrapper_benchmark import compiled_module_main
 	    compiled_module_main('None', benchmark_compiled_module)
 	
-V1205 21:30:41.734000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "e48994f23a0a2996c1e181d49ce0007f"}
+V1206 15:18:21.452000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "e0fd41277ac80fe5bd868c65d517d89b"}
 	{
 	"name": "PyCodeCache.load_by_key_path",
-	"ts": 1733463041733975.8,
+	"ts": 1733527101452695.0,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:41.749000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "03b0e93465eab73fcba9e622ee334cd9"}
+V1206 15:18:21.461000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "006d0abc072724b3a8ab6d84182d8431"}
 	{
 	"name": "async_compile.wait",
-	"ts": 1733463041749469.8,
+	"ts": 1733527101461170.2,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:42.339000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "e69220de6fe4867b7322a514c2105de6"}
+V1206 15:18:21.813000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "50c5a1c818507a194263d8d974159dd7"}
 	{
 	"name": "async_compile.wait",
-	"ts": 1733463042339010.2,
+	"ts": 1733527101813608.5,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:42.339000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "cb0326d9ffc91250da37a425c5fdddba"}
+V1206 15:18:21.814000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "2afa4a7e52f0e3b41010122154a8348f"}
 	{
 	"name": "PyCodeCache.load_by_key_path",
-	"ts": 1733463042339576.8,
+	"ts": 1733527101814085.2,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:42.340000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "741c804c6772d96f185f8db5679cad86"}
+V1206 15:18:21.814000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ee8f2db95d5b4f7e8cca64df4854e627"}
 	{
 	"name": "code_gen",
-	"ts": 1733463042339987.5,
+	"ts": 1733527101814319.8,
 	"args": {
 	"fn_name": "GraphLowering.compile_to_module",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:42.340000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "c896d952252eea8cd9012dc735d47da9"}
+V1206 15:18:21.814000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "600f56ac93eb73f3fb979f2e5d61dc44"}
 	{
 	"name": "GraphLowering.compile_to_fn",
-	"ts": 1733463042340347.0,
+	"ts": 1733527101814515.0,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:42.419000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "d0277385c9e0ce34607a97d1aabaedea"}
+V1206 15:18:21.900000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ff8853e249b1cd98d1a1c852aeb5bddd"}
 	{
 	"name": "TritonBundler.collect",
-	"ts": 1733463042419161.8,
+	"ts": 1733527101900813.5,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "B",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:42.420000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "b4601cadc5e584bef75f20ad64749b0d"}
+V1206 15:18:21.901000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "16e2d9e68e2009468c38b0d573810760"}
 	{
 	"name": "TritonBundler.collect",
-	"ts": 1733463042420170.2,
+	"ts": 1733527101901684.5,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:42.422000 1509703 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "71a202a25daa74965390689f0c166771"}
+V1206 15:18:21.902000 1500233 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "44378945e331d48456c66d308783acfa"}
 	{
 	"name": "fx_graph_cache_miss",
-	"ts": 1733463041262332.8,
+	"ts": 1733527101197944.0,
 	"args": {
-	"key": "fijuzlwvfbi5kcpbowj6vlob7bjmsn5mpvzya47coxveorhv3z7w",
+	"key": "ftyqixqyigk4dzwkgsofqaadk6duwj57z5bipzhn5alvsznuf2m7",
 	"components": [
 	"[pvmcgcpspyiotfrhpzur22xkrqhko5vx57txiq4wghhk7pm3ocb] gm: <lambda>()\n\n\n\ndef forward(self, arg0_1):\n    mul = torch.ops.aten.mul.Tensor(arg0_1, 0.5)\n    mul_1 = torch.ops.aten.mul.Tensor(arg0_1, 0.7071067811865476);  arg0_1 = None\n    erf = torch.ops.aten.erf.default(mul_1);  mul_1 = None\n    add = torch.ops.aten.add.Tensor(erf, 1);  erf = None\n    mul_2 = torch.ops.aten.mul.Tensor(mul, add);  mul = add = None\n    inductor_seeds_default = torch.ops.prims.inductor_seeds.default(1, device(type='cuda', index=0))\n    inductor_lookup_seed_default = torch.ops.prims.inductor_lookup_seed.default(inductor_seeds_default, 0);  inductor_seeds_default = None\n    inductor_random_default = torch.ops.prims.inductor_random.default([1, 320, 128, 128], inductor_lookup_seed_default, 'rand');  inductor_lookup_seed_default = None\n    clone = torch.ops.aten.clone.default(inductor_random_default, memory_format = torch.channels_last);  inductor_random_default = None\n    gt = torch.ops.aten.gt.Scalar(clone, 0.1);  clone = None\n    mul_3 = torch.ops.aten.mul.Tensor(gt, mul_2);  gt = mul_2 = None\n    mul_4 = torch.ops.aten.mul.Tensor(mul_3, 1.1111111111111112);  mul_3 = None\n    baz = torch.ops.test.baz.default(mul_4);  mul_4 = None\n    return (baz,)\n    \n# To see more debug info, please use `graph_module.print_readable()`",
 	"[qqiqyf6plgvsceqtyjt7qfdyz4oo3oixqvjcpsmypqbyyipyaif] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 320, 128, 128]), stride=(5242880, 1, 40960, 320), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.channels_last, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
@@ -908,10 +932,10 @@ V1205 21:30:42.422000 1509703 torch/_dynamo/utils.py:1327] {"chromium_event": {}
 	"[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0",
 	"[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)",
 	"[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)",
-	"[lazrphiwnez3ji6des6gtc4njrnab7zwbev2y4zmtejzolkeonc] torch_version: <bytes>",
-	"[c3z7bmoxyo6gl5hi47v6dc7jwsl55b3asd75nr25uyengi5ah3p] system_info[device]: {'name': 'NVIDIA PG509-210'}",
-	"[4tdxvgx6mly7ezamz2ha3km5vsr3c2li4xy77jtowhccraezut3] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-8d217ee87b6d439b8ab320cb84969cbec81a3692993e42439efbd2555077cdbd-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.1'}",
-	"[vuha4ftpp7xvpssr5oo6hre5gds5luljde44u6n6dahpycrz46w] system_info[hash]: bd18ba19a73d872953f4b194a13bfda93fc87d9d602faf8815918f646b14fef9",
+	"[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>",
+	"[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}",
+	"[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}",
+	"[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d",
 	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False",
 	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True",
 	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[debug]: True",
@@ -1167,11 +1191,11 @@ V1205 21:30:42.422000 1509703 torch/_dynamo/utils.py:1327] {"chromium_event": {}
 	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None",
 	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"
 	],
-	"cache_event_time": 1733463041270428892,
+	"cache_event_time": 1733527101203264662,
 	"cache_state": "miss",
 	"triton_bundler_meta": "TritonBundlerMetadata(cached_kernel_names=['triton_poi_fused_native_dropout_0', 'triton_poi_fused_baz_gelu_native_dropout_1'])",
-	"time_taken_ns": 1156794294,
-	"compile_id": "-/0/0"
+	"time_taken_ns": 702837316,
+	"compile_id": "0/0"
 	},
 	"ph": "i",
 	"cat": "dynamo_timed",
@@ -1179,19 +1203,19 @@ V1205 21:30:42.422000 1509703 torch/_dynamo/utils.py:1327] {"chromium_event": {}
 	"pid": 0,
 	"s": "p"
 	}
-V1205 21:30:42.423000 1509703 torch/_inductor/compile_fx.py:751] {"artifact": {"name": "fx_graph_cache_miss", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "568a646744dff7dd5925751089ad9cad"}
-	{"key": "fijuzlwvfbi5kcpbowj6vlob7bjmsn5mpvzya47coxveorhv3z7w", "components": ["[pvmcgcpspyiotfrhpzur22xkrqhko5vx57txiq4wghhk7pm3ocb] gm: <lambda>()\n\n\n\ndef forward(self, arg0_1):\n    mul = torch.ops.aten.mul.Tensor(arg0_1, 0.5)\n    mul_1 = torch.ops.aten.mul.Tensor(arg0_1, 0.7071067811865476);  arg0_1 = None\n    erf = torch.ops.aten.erf.default(mul_1);  mul_1 = None\n    add = torch.ops.aten.add.Tensor(erf, 1);  erf = None\n    mul_2 = torch.ops.aten.mul.Tensor(mul, add);  mul = add = None\n    inductor_seeds_default = torch.ops.prims.inductor_seeds.default(1, device(type='cuda', index=0))\n    inductor_lookup_seed_default = torch.ops.prims.inductor_lookup_seed.default(inductor_seeds_default, 0);  inductor_seeds_default = None\n    inductor_random_default = torch.ops.prims.inductor_random.default([1, 320, 128, 128], inductor_lookup_seed_default, 'rand');  inductor_lookup_seed_default = None\n    clone = torch.ops.aten.clone.default(inductor_random_default, memory_format = torch.channels_last);  inductor_random_default = None\n    gt = torch.ops.aten.gt.Scalar(clone, 0.1);  clone = None\n    mul_3 = torch.ops.aten.mul.Tensor(gt, mul_2);  gt = mul_2 = None\n    mul_4 = torch.ops.aten.mul.Tensor(mul_3, 1.1111111111111112);  mul_3 = None\n    baz = torch.ops.test.baz.default(mul_4);  mul_4 = None\n    return (baz,)\n    \n# To see more debug info, please use `graph_module.print_readable()`", "[qqiqyf6plgvsceqtyjt7qfdyz4oo3oixqvjcpsmypqbyyipyaif] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 320, 128, 128]), stride=(5242880, 1, 40960, 320), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.channels_last, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", "[lmglpn4zi7vob56n34r2j2rk7flv5xfgrcvmo7xcpirqsitygqx] fx_kwargs[boxed_forward_device_index]: BoxedDeviceIndex(value=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", "[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] fx_kwargs[static_input_idxs]: []", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0", "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", "[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)", "[lazrphiwnez3ji6des6gtc4njrnab7zwbev2y4zmtejzolkeonc] torch_version: <bytes>", "[c3z7bmoxyo6gl5hi47v6dc7jwsl55b3asd75nr25uyengi5ah3p] system_info[device]: {'name': 'NVIDIA PG509-210'}", "[4tdxvgx6mly7ezamz2ha3km5vsr3c2li4xy77jtowhccraezut3] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-8d217ee87b6d439b8ab320cb84969cbec81a3692993e42439efbd2555077cdbd-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.1'}", "[vuha4ftpp7xvpssr5oo6hre5gds5luljde44u6n6dahpycrz46w] system_info[hash]: bd18ba19a73d872953f4b194a13bfda93fc87d9d602faf8815918f646b14fef9", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[debug]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[fx_graph_remote_cache]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_local_cache]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[autotune_remote_cache]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[bundled_autotune_remote_cache]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[custom_op_default_layout_constraint]: needs_fixed_stride_order", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {}", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True", "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[debug_index_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128", "[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[generate_intermediate_hooks]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False", "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.min_chunk_size]: 1", "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.autotune_pointwise]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", "[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {}", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: []", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"], "cache_event_time": 1733463041270428892, "cache_state": "miss", "triton_bundler_meta": "TritonBundlerMetadata(cached_kernel_names=['triton_poi_fused_native_dropout_0', 'triton_poi_fused_baz_gelu_native_dropout_1'])", "time_taken_ns": 1156794294, "compile_id": "-/0/0"}
-V1205 21:30:42.424000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "b87cbf8e565cbc2f0432eb2e061a553d"}
+V1206 15:18:21.903000 1500233 torch/_inductor/compile_fx.py:751] {"artifact": {"name": "fx_graph_cache_miss", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "bd34f8d5d775fcaf77d2c02d265bee67"}
+	{"key": "ftyqixqyigk4dzwkgsofqaadk6duwj57z5bipzhn5alvsznuf2m7", "components": ["[pvmcgcpspyiotfrhpzur22xkrqhko5vx57txiq4wghhk7pm3ocb] gm: <lambda>()\n\n\n\ndef forward(self, arg0_1):\n    mul = torch.ops.aten.mul.Tensor(arg0_1, 0.5)\n    mul_1 = torch.ops.aten.mul.Tensor(arg0_1, 0.7071067811865476);  arg0_1 = None\n    erf = torch.ops.aten.erf.default(mul_1);  mul_1 = None\n    add = torch.ops.aten.add.Tensor(erf, 1);  erf = None\n    mul_2 = torch.ops.aten.mul.Tensor(mul, add);  mul = add = None\n    inductor_seeds_default = torch.ops.prims.inductor_seeds.default(1, device(type='cuda', index=0))\n    inductor_lookup_seed_default = torch.ops.prims.inductor_lookup_seed.default(inductor_seeds_default, 0);  inductor_seeds_default = None\n    inductor_random_default = torch.ops.prims.inductor_random.default([1, 320, 128, 128], inductor_lookup_seed_default, 'rand');  inductor_lookup_seed_default = None\n    clone = torch.ops.aten.clone.default(inductor_random_default, memory_format = torch.channels_last);  inductor_random_default = None\n    gt = torch.ops.aten.gt.Scalar(clone, 0.1);  clone = None\n    mul_3 = torch.ops.aten.mul.Tensor(gt, mul_2);  gt = mul_2 = None\n    mul_4 = torch.ops.aten.mul.Tensor(mul_3, 1.1111111111111112);  mul_3 = None\n    baz = torch.ops.test.baz.default(mul_4);  mul_4 = None\n    return (baz,)\n    \n# To see more debug info, please use `graph_module.print_readable()`", "[qqiqyf6plgvsceqtyjt7qfdyz4oo3oixqvjcpsmypqbyyipyaif] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 320, 128, 128]), stride=(5242880, 1, 40960, 320), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.channels_last, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", "[lmglpn4zi7vob56n34r2j2rk7flv5xfgrcvmo7xcpirqsitygqx] fx_kwargs[boxed_forward_device_index]: BoxedDeviceIndex(value=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", "[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] fx_kwargs[static_input_idxs]: []", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0", "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", "[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)", "[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>", "[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}", "[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}", "[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[debug]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[fx_graph_remote_cache]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_local_cache]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[autotune_remote_cache]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[bundled_autotune_remote_cache]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[custom_op_default_layout_constraint]: needs_fixed_stride_order", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {}", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True", "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[debug_index_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128", "[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[generate_intermediate_hooks]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False", "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.min_chunk_size]: 1", "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.autotune_pointwise]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", "[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {}", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: []", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"], "cache_event_time": 1733527101203264662, "cache_state": "miss", "triton_bundler_meta": "TritonBundlerMetadata(cached_kernel_names=['triton_poi_fused_native_dropout_0', 'triton_poi_fused_baz_gelu_native_dropout_1'])", "time_taken_ns": 702837316, "compile_id": "0/0"}
+V1206 15:18:21.903000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "785c297e1e81aef3095cf23f30840768"}
 	{
 	"name": "inductor_compile",
-	"ts": 1733463042423957.8,
+	"ts": 1733527101903705.0,
 	"args": {
 	"fn_name": "compile_fx_inner",
-	"compile_id": "-/0/0",
+	"compile_id": "0/0",
 	"is_backward": false,
 	"cache_state": "miss",
-	"cache_event_time": 1733463041262332756,
-	"key": "fijuzlwvfbi5kcpbowj6vlob7bjmsn5mpvzya47coxveorhv3z7w",
+	"cache_event_time": 1733527101197943979,
+	"key": "ftyqixqyigk4dzwkgsofqaadk6duwj57z5bipzhn5alvsznuf2m7",
 	"components": [
 	"[pvmcgcpspyiotfrhpzur22xkrqhko5vx57txiq4wghhk7pm3ocb] gm: <lambda>()\n\n\n\ndef forward(self, arg0_1):\n    mul = torch.ops.aten.mul.Tensor(arg0_1, 0.5)\n    mul_1 = torch.ops.aten.mul.Tensor(arg0_1, 0.7071067811865476);  arg0_1 = None\n    erf = torch.ops.aten.erf.default(mul_1);  mul_1 = None\n    add = torch.ops.aten.add.Tensor(erf, 1);  erf = None\n    mul_2 = torch.ops.aten.mul.Tensor(mul, add);  mul = add = None\n    inductor_seeds_default = torch.ops.prims.inductor_seeds.default(1, device(type='cuda', index=0))\n    inductor_lookup_seed_default = torch.ops.prims.inductor_lookup_seed.default(inductor_seeds_default, 0);  inductor_seeds_default = None\n    inductor_random_default = torch.ops.prims.inductor_random.default([1, 320, 128, 128], inductor_lookup_seed_default, 'rand');  inductor_lookup_seed_default = None\n    clone = torch.ops.aten.clone.default(inductor_random_default, memory_format = torch.channels_last);  inductor_random_default = None\n    gt = torch.ops.aten.gt.Scalar(clone, 0.1);  clone = None\n    mul_3 = torch.ops.aten.mul.Tensor(gt, mul_2);  gt = mul_2 = None\n    mul_4 = torch.ops.aten.mul.Tensor(mul_3, 1.1111111111111112);  mul_3 = None\n    baz = torch.ops.test.baz.default(mul_4);  mul_4 = None\n    return (baz,)\n    \n# To see more debug info, please use `graph_module.print_readable()`",
 	"[qqiqyf6plgvsceqtyjt7qfdyz4oo3oixqvjcpsmypqbyyipyaif] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 320, 128, 128]), stride=(5242880, 1, 40960, 320), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.channels_last, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
@@ -1207,10 +1231,10 @@ V1205 21:30:42.424000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0",
 	"[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)",
 	"[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)",
-	"[lazrphiwnez3ji6des6gtc4njrnab7zwbev2y4zmtejzolkeonc] torch_version: <bytes>",
-	"[c3z7bmoxyo6gl5hi47v6dc7jwsl55b3asd75nr25uyengi5ah3p] system_info[device]: {'name': 'NVIDIA PG509-210'}",
-	"[4tdxvgx6mly7ezamz2ha3km5vsr3c2li4xy77jtowhccraezut3] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-8d217ee87b6d439b8ab320cb84969cbec81a3692993e42439efbd2555077cdbd-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.1'}",
-	"[vuha4ftpp7xvpssr5oo6hre5gds5luljde44u6n6dahpycrz46w] system_info[hash]: bd18ba19a73d872953f4b194a13bfda93fc87d9d602faf8815918f646b14fef9",
+	"[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>",
+	"[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}",
+	"[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}",
+	"[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d",
 	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False",
 	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True",
 	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[debug]: True",
@@ -1475,36 +1499,36 @@ V1205 21:30:42.424000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:42.425000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "2f52734db904a6c7c1d62d5e3b7637b7"}
+V1206 15:18:21.904000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ab4c2fac9b0442f97c51db98db8465ea"}
 	{
 	"name": "compile_fx.<locals>.fw_compiler_base",
-	"ts": 1733463042424954.0,
+	"ts": 1733527101904297.0,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:42.428000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "fb0a8aae91ae7820ae966a990fd2a460"}
+V1206 15:18:21.906000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "b95d05f6f59e0845136feab28a56710f"}
 	{
 	"name": "create_aot_dispatcher_function",
-	"ts": 1733463042428508.0,
+	"ts": 1733527101906249.2,
 	"args": {
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:42.429000 1509703 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "c7d11d8ed2437f6669246a41aa55120c"}
+V1206 15:18:21.906000 1500233 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "7747dc628b14f97c4c4acd0ba40994f5"}
 	{
 	"name": "autograd_cache_miss",
-	"ts": 1733463041063231.8,
+	"ts": 1733527101086465.2,
 	"args": {
-	"key": "amlpkwfoc25nt37for2bdylpeluzlxktlczjzhjrbvruoqxm7tw4",
+	"key": "avcrrszv2esbpxnxz7vvqltm5eurkzwsst2nqqq3sks3p2h3bs3t",
 	"cache_state": "miss",
 	"components": [
 	"[rercb27oxulpvyy73cg2bk7544vnpfzorcoyihcqcrxerzxuiwh] aot_config: (0, True, False, False, False, [LocalSource(local_name='x', is_input=True, is_root_frame_cell=False)], True, False)",
@@ -1517,10 +1541,10 @@ V1205 21:30:42.429000 1509703 torch/_dynamo/utils.py:1327] {"chromium_event": {}
 	"[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)",
 	"[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)",
 	"[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)",
-	"[lazrphiwnez3ji6des6gtc4njrnab7zwbev2y4zmtejzolkeonc] torch_version: <bytes>",
-	"[c3z7bmoxyo6gl5hi47v6dc7jwsl55b3asd75nr25uyengi5ah3p] system_info[device]: {'name': 'NVIDIA PG509-210'}",
-	"[4tdxvgx6mly7ezamz2ha3km5vsr3c2li4xy77jtowhccraezut3] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-8d217ee87b6d439b8ab320cb84969cbec81a3692993e42439efbd2555077cdbd-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.1'}",
-	"[vuha4ftpp7xvpssr5oo6hre5gds5luljde44u6n6dahpycrz46w] system_info[hash]: bd18ba19a73d872953f4b194a13bfda93fc87d9d602faf8815918f646b14fef9",
+	"[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>",
+	"[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}",
+	"[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}",
+	"[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d",
 	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False",
 	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True",
 	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[debug]: True",
@@ -1776,7 +1800,7 @@ V1205 21:30:42.429000 1509703 torch/_dynamo/utils.py:1327] {"chromium_event": {}
 	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None",
 	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"
 	],
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "i",
 	"cat": "dynamo_timed",
@@ -1784,20 +1808,20 @@ V1205 21:30:42.429000 1509703 torch/_dynamo/utils.py:1327] {"chromium_event": {}
 	"pid": 0,
 	"s": "p"
 	}
-V1205 21:30:42.429000 1509703 torch/_functorch/_aot_autograd/autograd_cache.py:763] {"artifact": {"name": "aotautograd_cache_hash", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "a0e1a9fd1f1178c765862fc27606410b"}
-	{"key": "amlpkwfoc25nt37for2bdylpeluzlxktlczjzhjrbvruoqxm7tw4", "cache_state": "miss", "components": ["[rercb27oxulpvyy73cg2bk7544vnpfzorcoyihcqcrxerzxuiwh] aot_config: (0, True, False, False, False, [LocalSource(local_name='x', is_input=True, is_root_frame_cell=False)], True, False)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] grad_enabled: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] disable_amp: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] deterministic_algorithms: False", "[35fcxhrk3gooceher4yasifwf7bz6x35efasz4elzmcyrazyaai] autograd_config: <bytes>", "[nilk63auvy3gsylfusxx5tw2tokdolrygfdskikekzdsd5xiuio] gm: GraphModule()\n\n\n\ndef forward(self, L_x_ : torch.Tensor):\n    l_x_ = L_x_\n    out = torch._C._nn.gelu(l_x_);  l_x_ = None\n    input_1 = torch.nn.functional.dropout(out, 0.1, True, False);  out = None\n    out_1 = torch.ops.test.baz(input_1);  input_1 = None\n    return (out_1,)\n    \n# To see more debug info, please use `graph_module.print_readable()`", "[qqiqyf6plgvsceqtyjt7qfdyz4oo3oixqvjcpsmypqbyyipyaif] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 320, 128, 128]), stride=(5242880, 1, 40960, 320), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.channels_last, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)", "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", "[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)", "[lazrphiwnez3ji6des6gtc4njrnab7zwbev2y4zmtejzolkeonc] torch_version: <bytes>", "[c3z7bmoxyo6gl5hi47v6dc7jwsl55b3asd75nr25uyengi5ah3p] system_info[device]: {'name': 'NVIDIA PG509-210'}", "[4tdxvgx6mly7ezamz2ha3km5vsr3c2li4xy77jtowhccraezut3] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-8d217ee87b6d439b8ab320cb84969cbec81a3692993e42439efbd2555077cdbd-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.1'}", "[vuha4ftpp7xvpssr5oo6hre5gds5luljde44u6n6dahpycrz46w] system_info[hash]: bd18ba19a73d872953f4b194a13bfda93fc87d9d602faf8815918f646b14fef9", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[debug]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[fx_graph_remote_cache]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_local_cache]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[autotune_remote_cache]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[bundled_autotune_remote_cache]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[custom_op_default_layout_constraint]: needs_fixed_stride_order", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {}", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True", "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[debug_index_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128", "[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[generate_intermediate_hooks]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False", "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.min_chunk_size]: 1", "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.autotune_pointwise]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", "[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {}", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: []", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"], "compile_id": "-/0/0"}
-V1205 21:30:42.430000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "12d29cd250a2b2f46b49688ab64a6e49"}
+V1206 15:18:21.906000 1500233 torch/_functorch/_aot_autograd/autograd_cache.py:763] {"artifact": {"name": "aotautograd_cache_hash", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "af016cc50869012432c17034d2b4b2c0"}
+	{"key": "avcrrszv2esbpxnxz7vvqltm5eurkzwsst2nqqq3sks3p2h3bs3t", "cache_state": "miss", "components": ["[rercb27oxulpvyy73cg2bk7544vnpfzorcoyihcqcrxerzxuiwh] aot_config: (0, True, False, False, False, [LocalSource(local_name='x', is_input=True, is_root_frame_cell=False)], True, False)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] grad_enabled: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] disable_amp: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] deterministic_algorithms: False", "[35fcxhrk3gooceher4yasifwf7bz6x35efasz4elzmcyrazyaai] autograd_config: <bytes>", "[nilk63auvy3gsylfusxx5tw2tokdolrygfdskikekzdsd5xiuio] gm: GraphModule()\n\n\n\ndef forward(self, L_x_ : torch.Tensor):\n    l_x_ = L_x_\n    out = torch._C._nn.gelu(l_x_);  l_x_ = None\n    input_1 = torch.nn.functional.dropout(out, 0.1, True, False);  out = None\n    out_1 = torch.ops.test.baz(input_1);  input_1 = None\n    return (out_1,)\n    \n# To see more debug info, please use `graph_module.print_readable()`", "[qqiqyf6plgvsceqtyjt7qfdyz4oo3oixqvjcpsmypqbyyipyaif] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 320, 128, 128]), stride=(5242880, 1, 40960, 320), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.channels_last, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)", "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", "[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)", "[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>", "[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}", "[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}", "[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[debug]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[fx_graph_remote_cache]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_local_cache]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[autotune_remote_cache]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[bundled_autotune_remote_cache]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[custom_op_default_layout_constraint]: needs_fixed_stride_order", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {}", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True", "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[debug_index_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128", "[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[generate_intermediate_hooks]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False", "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.min_chunk_size]: 1", "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.autotune_pointwise]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", "[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {}", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: []", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"], "compile_id": "0/0"}
+V1206 15:18:21.907000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "210650099f3176e17ed6c62583692e36"}
 	{
 	"name": "backend_compile",
-	"ts": 1733463042430058.0,
+	"ts": 1733527101907180.8,
 	"args": {
 	"fn_name": "OutputGraph.call_user_compiler",
-	"compile_id": "-/0/0",
+	"compile_id": "0/0",
 	"requires_subclass_dispatch": false,
 	"dispatch_mode": "inference",
 	"cache_state": "miss",
-	"cache_event_time": 1733463041063231838,
-	"key": "amlpkwfoc25nt37for2bdylpeluzlxktlczjzhjrbvruoqxm7tw4",
+	"cache_event_time": 1733527101086465244,
+	"key": "avcrrszv2esbpxnxz7vvqltm5eurkzwsst2nqqq3sks3p2h3bs3t",
 	"components": [
 	"[rercb27oxulpvyy73cg2bk7544vnpfzorcoyihcqcrxerzxuiwh] aot_config: (0, True, False, False, False, [LocalSource(local_name='x', is_input=True, is_root_frame_cell=False)], True, False)",
 	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] grad_enabled: False",
@@ -1809,10 +1833,10 @@ V1205 21:30:42.430000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)",
 	"[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)",
 	"[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)",
-	"[lazrphiwnez3ji6des6gtc4njrnab7zwbev2y4zmtejzolkeonc] torch_version: <bytes>",
-	"[c3z7bmoxyo6gl5hi47v6dc7jwsl55b3asd75nr25uyengi5ah3p] system_info[device]: {'name': 'NVIDIA PG509-210'}",
-	"[4tdxvgx6mly7ezamz2ha3km5vsr3c2li4xy77jtowhccraezut3] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-8d217ee87b6d439b8ab320cb84969cbec81a3692993e42439efbd2555077cdbd-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.1'}",
-	"[vuha4ftpp7xvpssr5oo6hre5gds5luljde44u6n6dahpycrz46w] system_info[hash]: bd18ba19a73d872953f4b194a13bfda93fc87d9d602faf8815918f646b14fef9",
+	"[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>",
+	"[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}",
+	"[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}",
+	"[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d",
 	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False",
 	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True",
 	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[debug]: True",
@@ -2077,7 +2101,7 @@ V1205 21:30:42.430000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:42.441000 1509703 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "d157edefb4de0135d87e8fdb7f2837dd"}
+V1206 15:18:21.913000 1500233 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "0f04f735fae7ac445870b5956bc06ba9"}
 	
 	TREE_GUARD_MANAGER:
 	+- RootGuardManager
@@ -2088,12 +2112,12 @@ V1205 21:30:42.441000 1509703 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | +- TENSOR_MATCH: check_tensor(L['x'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 320, 128, 128], stride=[5242880, 1, 40960, 320])
 	| | +- NO_HASATTR: hasattr(L['x'], '_dynamo_dynamic_indices') == False         
 	| +- GuardManager: source=L['self'], accessed_by=DictGetItemGuardAccessor('self')
-	| | +- TYPE_MATCH: ___check_type_id(L['self'], 1272245072)                     
+	| | +- TYPE_MATCH: ___check_type_id(L['self'], 184117680)                      
 	| | +- GuardManager: source=L['self'].__dict__, accessed_by=GetGenericDictGuardAccessor
 	| | | +- GuardManager: source=L['self']._modules, accessed_by=DictGetItemGuardAccessor('_modules')
 	| | | | +- DICT_LENGTH: len(L['self']._modules) == 1                                
 	| | | | +- GuardManager: source=L['self']._modules['in_layers'], accessed_by=DictGetItemGuardAccessor('in_layers')
-	| | | | | +- TYPE_MATCH: ___check_type_id(L['self']._modules['in_layers'], 112647984)
+	| | | | | +- TYPE_MATCH: ___check_type_id(L['self']._modules['in_layers'], 113588064)
 	| | | | | +- LENGTH_CHECK: len(L['self']._modules['in_layers']) == 1                   
 	| | | | | +- GuardManager: source=L['self']._modules['in_layers'].__dict__, accessed_by=GetGenericDictGuardAccessor
 	| | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['self']._modules['in_layers'].__dict__)
@@ -2102,7 +2126,7 @@ V1205 21:30:42.441000 1509703 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | | | | | | +- KeyManager: GuardManager: source=list(L['self']._modules['in_layers']._modules.keys())[0]
 	| | | | | | | | | +- EQUALS_MATCH: list(L['self']._modules['in_layers']._modules.keys())[0] == '0'
 	| | | | | | | | +- ValueManager: GuardManager: source=L['self']._modules['in_layers']._modules['0']
-	| | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['self']._modules['in_layers']._modules['0'], 113137344)
+	| | | | | | | | | +- TYPE_MATCH: ___check_type_id(L['self']._modules['in_layers']._modules['0'], 111068416)
 	| | | | | | | | | +- GuardManager: source=L['self']._modules['in_layers']._modules['0'].__dict__, accessed_by=GetGenericDictGuardAccessor
 	| | | | | | | | | | +- DICT_CONTAINS: not ___dict_contains('forward', L['self']._modules['in_layers']._modules['0'].__dict__)
 	| | | | | | | | | | +- GuardManager: source=L['self']._modules['in_layers']._modules['0'].p, accessed_by=DictGetItemGuardAccessor('p')
@@ -2115,25 +2139,25 @@ V1205 21:30:42.441000 1509703 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | | +- DICT_LENGTH: not L['self']._parameters                                   
 	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
 	| | +- GuardManager: source=G['F'], accessed_by=DictGetItemGuardAccessor('F')
-	| | | +- ID_MATCH: ___check_obj_id(G['F'], 140230317209248)                    
+	| | | +- ID_MATCH: ___check_obj_id(G['F'], 139720064954224)                    
 	| | | +- OBJECT_ALIASING: G['F'] is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
 	| | | +- GuardManager: source=G['F'].gelu, accessed_by=GetAttrGuardAccessor(gelu)
-	| | | | +- ID_MATCH: ___check_obj_id(G['F'].gelu, 140230444199872)               
+	| | | | +- ID_MATCH: ___check_obj_id(G['F'].gelu, 139721962700240)               
 	| | | +- GuardManager: source=G['F'].dropout, accessed_by=GetAttrGuardAccessor(dropout)
-	| | | | +- ID_MATCH: ___check_obj_id(G['F'].dropout, 140230284934048)            
+	| | | | +- ID_MATCH: ___check_obj_id(G['F'].dropout, 139720032587424)            
 	| | +- GuardManager: source=G['torch'], accessed_by=DictGetItemGuardAccessor('torch')
-	| | | +- ID_MATCH: ___check_obj_id(G['torch'], 140230530107648)                
+	| | | +- ID_MATCH: ___check_obj_id(G['torch'], 139724697307792)                
 	| | | +- GuardManager: source=G['torch'].ops, accessed_by=GetAttrGuardAccessor(ops)
-	| | | | +- ID_MATCH: ___check_obj_id(G['torch'].ops, 140230295423792)            
+	| | | | +- ID_MATCH: ___check_obj_id(G['torch'].ops, 139720033503648)            
 	| | | | +- GuardManager: source=G['torch'].ops.test, accessed_by=GetAttrGuardAccessor(test)
-	| | | | | +- ID_MATCH: ___check_obj_id(G['torch'].ops.test, 140228286501072)       
+	| | | | | +- ID_MATCH: ___check_obj_id(G['torch'].ops.test, 139719717837520)       
 	| | | | | +- GuardManager: source=G['torch'].ops.test.baz, accessed_by=GetAttrGuardAccessor(baz)
-	| | | | | | +- ID_MATCH: ___check_obj_id(G['torch'].ops.test.baz, 140228148062288)   
+	| | | | | | +- ID_MATCH: ___check_obj_id(G['torch'].ops.test.baz, 139710547383760)   
 	| | +- GuardManager: source=G['__builtins_dict___0'], accessed_by=DictGetItemGuardAccessor('__builtins_dict___0')
 	| | | +- GuardManager: source=G['__builtins_dict___0']['iter'], accessed_by=DictGetItemGuardAccessor('iter')
-	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___0']['iter'], 140230541218560)
+	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___0']['iter'], 139724706059008)
 	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_module'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_nn_dot_modules_dot_module')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_module'], 140230387096352)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_module'], 139721387922336)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_module']._global_forward_hooks, accessed_by=GetAttrGuardAccessor(_global_forward_hooks)
 	| | | | +- DICT_LENGTH: not G['__import_torch_dot_nn_dot_modules_dot_module']._global_forward_hooks
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_module']._global_backward_hooks, accessed_by=GetAttrGuardAccessor(_global_backward_hooks)
@@ -2143,32 +2167,32 @@ V1205 21:30:42.441000 1509703 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_module']._global_backward_pre_hooks, accessed_by=GetAttrGuardAccessor(_global_backward_pre_hooks)
 	| | | | +- DICT_LENGTH: not G['__import_torch_dot_nn_dot_modules_dot_module']._global_backward_pre_hooks
 	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_dropout'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_nn_dot_modules_dot_dropout')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_dropout'], 140230274409200)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_dropout'], 139720031577856)
 	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_dropout'].F, accessed_by=GetAttrGuardAccessor(F)
 	| | | | +- OBJECT_ALIASING: G['F'] is G['__import_torch_dot_nn_dot_modules_dot_dropout'].F
 	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_modules_dot_container'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_nn_dot_modules_dot_container')
-	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_container'], 140230285761312)
+	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_modules_dot_container'], 139720031411696)
 	
-V1205 21:30:42.442000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "457e05ec388443d4698cd91f11caf553"}
+V1206 15:18:21.914000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "2967ad97a7a8f2d0901c376ff0ef093c"}
 	{
 	"name": "entire_frame_compile",
-	"ts": 1733463042442267.0,
+	"ts": 1733527101913985.8,
 	"args": {
 	"fn_name": "_compile.compile_inner",
-	"compile_id": "-/0/0"
+	"compile_id": "0/0"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
 	}
-V1205 21:30:42.447000 1509703 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"pre_grad_pass_time_us": 17783, "joint_graph_pass_time_us": 161535, "feature_usage": {"pytorch/remote_cache:fx_graph_memcache_version": true, "pytorch/remote_cache:bundle_triton_into_fx_graph_cache_v2": true}, "post_grad_pass_time_us": 26956, "inductor_code_gen_cumulative_compile_time_us": 950022, "inductor_cumulative_compile_time_us": 1162996, "aot_autograd_cumulative_compile_time_us": 3306909, "dynamo_cumulative_compile_time_us": 3818533, "frame_key": "1", "co_name": "forward", "co_filename": "/data/users/xmfan/a/pytorch/test/inductor/test_torchinductor.py", "co_firstlineno": 11123, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 49, "shape_env_guard_count": 0, "graph_op_count": 3, "graph_node_count": 5, "graph_input_count": 1, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": ["test::baz"], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch._prims\", \"torch._refs\", \"torch.testing\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
-V1205 21:30:42.447000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "dd5a14aa99f2daa726596396eebf08c4"}
+V1206 15:18:21.916000 1500233 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"pre_grad_pass_time_us": 11924, "joint_graph_pass_time_us": 89462, "feature_usage": {"pytorch/remote_cache:fx_graph_memcache_version": true, "pytorch/remote_cache:bundle_triton_into_fx_graph_cache_v2": true}, "post_grad_pass_time_us": 17215, "inductor_code_gen_cumulative_compile_time_us": 529174, "inductor_cumulative_compile_time_us": 706575, "aot_autograd_cumulative_compile_time_us": 1307090, "dynamo_cumulative_compile_time_us": 1659332, "frame_key": "1", "co_name": "forward", "co_filename": "/data/users/xmfan/a/pytorch/test/inductor/test_torchinductor.py", "co_firstlineno": 11123, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 49, "shape_env_guard_count": 0, "graph_op_count": 3, "graph_node_count": 5, "graph_input_count": 1, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": ["test::baz"], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch.testing\", \"torch._prims\", \"torch.distributions\", \"torch._decomp\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
+V1206 15:18:21.916000 1500233 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "188d1c02e0f0d39aa82bec9ce598f130"}
 	{
 	"name": "dynamo",
-	"ts": 1733463042447307.8,
+	"ts": 1733527101916886.5,
 	"args": {
-	"compile_id": "-/0/0",
+	"compile_id": "0/0",
 	"frame_key": "1",
 	"co_name": "forward",
 	"co_filename": "/data/users/xmfan/a/pytorch/test/inductor/test_torchinductor.py",
@@ -2191,10 +2215,10 @@ V1205 21:30:42.447000 1509703 torch/_dynamo/utils.py:1288] {"chromium_event": {}
 	"restart_reasons": [],
 	"dynamo_time_before_restart_s": 0.0,
 	"has_guarded_code": true,
-	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch._prims\", \"torch._refs\", \"torch.testing\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
+	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._refs\", \"torch.testing\", \"torch._prims\", \"torch.distributions\", \"torch._decomp\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": false, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
 	},
 	"ph": "E",
 	"cat": "dynamo_timed",
 	"tid": 0,
 	"pid": 0
-	}
\ No newline at end of file
+	}
diff --git a/tests/integration_test.rs b/tests/integration_test.rs
index 46a1d04..87cc9cd 100644
--- a/tests/integration_test.rs
+++ b/tests/integration_test.rs
@@ -11,13 +11,12 @@ fn prefix_exists(map: &HashMap<PathBuf, String>, prefix: &str) -> bool {
 #[test]
 fn test_parse_simple() {
     let expected_files = [
-        "-_0_0_0/aot_forward_graph",
-        "-_0_0_0/dynamo_output_graph",
+        "0_0_0/aot_forward_graph",
+        "0_0_0/dynamo_output_graph",
         "index.html",
         "failures_and_restarts.html",
-        "-_0_0_0/inductor_post_grad_graph",
-        "-_0_0_0/inductor_output_code",
-        "-_0_0_0/dynamo_guards",
+        "0_0_0/inductor_post_grad_graph",
+        "0_0_0/inductor_output_code",
     ];
     // Read the test file
     // simple.log was generated from the following:
@@ -43,12 +42,12 @@ fn test_parse_simple() {
 #[test]
 fn test_parse_compilation_metrics() {
     let expected_files = [
-        "-_0_0_1/dynamo_output_graph",
-        "-_0_0_1/compilation_metrics",
-        "-_1_0_1/dynamo_output_graph",
-        "-_1_0_1/compilation_metrics",
-        "-_2_0_0/dynamo_output_graph",
-        "-_2_0_0/compilation_metrics",
+        "0_0_1/dynamo_output_graph",
+        "0_0_1/compilation_metrics",
+        "1_0_1/dynamo_output_graph",
+        "1_0_1/compilation_metrics",
+        "2_0_0/dynamo_output_graph",
+        "2_0_0/compilation_metrics",
         "index.html",
         "failures_and_restarts.html",
     ];
@@ -76,8 +75,8 @@ fn test_parse_compilation_metrics() {
 #[test]
 fn test_parse_compilation_failures() {
     let expected_files = [
-        "-_0_0_0/dynamo_output_graph",
-        "-_0_0_0/compilation_metrics",
+        "0_0_0/dynamo_output_graph",
+        "0_0_0/compilation_metrics",
         "index.html",
         "failures_and_restarts.html",
     ];
@@ -104,10 +103,10 @@ fn test_parse_compilation_failures() {
 
 #[test]
 fn test_parse_artifact() {
-    let expected_files = ["-_0_0_0/fx_graph_cache_hash", "index.html"];
+    let expected_files = ["0_0_0/fx_graph_cache_hash", "index.html"];
     // Read the test file
     // artifacts.log was generated from the following:
-    // NOTE: test output looks nothing like artifacts.log
+    // NOTE: this test command looks wrong, and is not producing anything close to artifacts.log
     // TORCH_TRACE=~/trace_logs/test python test/inductor/test_torchinductor.py  -k TORCH_TRACE=~/trace_logs/comp_metrics python test/dynamo/test_misc.py -k test_graph_break_compilation_metrics_on_failure
     let path = Path::new("tests/inputs/artifacts.log").to_path_buf();
     let config = tlparse::ParseConfig {
@@ -154,9 +153,9 @@ fn test_parse_chromium_event() {
 #[test]
 fn test_cache_hit_miss() {
     let expected_files = [
-        "-_1_0_0/fx_graph_cache_miss_33.json",
-        "-_1_0_0/fx_graph_cache_miss_9.json",
-        "-_1_0_0/fx_graph_cache_hit_20.json",
+        "1_0_0/fx_graph_cache_miss_33.json",
+        "1_0_0/fx_graph_cache_miss_9.json",
+        "1_0_0/fx_graph_cache_hit_20.json",
         "index.html",
     ];
     // Generated via TORCH_TRACE=~/trace_logs/test python test/inductor/test_codecache.py -k test_flex_attention_caching